kernel_samsung_sm7125/arch/sparc64/kernel/dtlb_base.S

/* $Id: dtlb_base.S,v 1.17 2001/10/11 22:33:52 davem Exp $
 * dtlb_base.S:	Front end to DTLB miss replacement strategy.
 *              This is included directly into the trap table.
 *
 * Copyright (C) 1996,1998 David S. Miller (davem@redhat.com)
 * Copyright (C) 1997,1998 Jakub Jelinek   (jj@ultra.linux.cz)
 */

#include <asm/pgtable.h>
#include <asm/mmu.h>

/* %g1	TLB_SFSR	(%g1 + %g1 == TLB_TAG_ACCESS)
 * %g2	(KERN_HIGHBITS | KERN_LOWBITS)
 * %g3  VPTE base	(0xfffffffe00000000)	Spitfire/Blackbird (44-bit VA space)
 *			(0xffe0000000000000)	Cheetah		   (64-bit VA space)
 * %g7	__pa(current->mm->pgd)
 *
 * The VPTE base value is completely magic, but note that
 * few places in the kernel other than these TLB miss
 * handlers know anything about the VPTE mechanism or
 * how it works (see VPTE_SIZE, TASK_SIZE and PTRS_PER_PGD).
 * Consider the 44-bit VADDR Ultra-I/II case as an example:
 *
 * VA[0 :  (1<<43)] produce VPTE index [%g3                        :   0]
 * VA[0 : -(1<<43)] produce VPTE index [%g3-(1<<(43-PAGE_SHIFT+3)) : %g3]
 *
 * For Cheetah's 64-bit VADDR space this is:
 *
 * VA[0 :  (1<<63)] produce VPTE index [%g3                        :   0]
 * VA[0 : -(1<<63)] produce VPTE index [%g3-(1<<(63-PAGE_SHIFT+3)) : %g3]
 *
 * If you're paying attention you'll notice that this means half of
 * the VPTE table is above %g3 and half is below, low VA addresses
 * map progressively upwards from %g3, and high VA addresses map
 * progressively upwards towards %g3.  This trick was needed to make
 * the same 8 instruction handler work both for Spitfire/Blackbird's
 * peculiar VA space hole configuration and the full 64-bit VA space
 * one of Cheetah at the same time.
 */

/* Ways we can get here:
 *
 * 1) Nucleus loads and stores to/from PA-->VA direct mappings.
 * 2) Nucleus loads and stores to/from vmalloc() areas.
 * 3) User loads and stores.
 * 4) User space accesses by nucleus at tl0
 */

#if PAGE_SHIFT == 13
/*
 * To compute vpte offset, we need to do ((addr >> 13) << 3),
 * which can be optimized to (addr >> 10) if bits 10/11/12 can
 * be guaranteed to be 0 ... mmu_context.h does guarantee this
 * by only using 10 bits in the hwcontext value.
 */
#define CREATE_VPTE_OFFSET1(r1, r2) nop
#define CREATE_VPTE_OFFSET2(r1, r2) \
				srax	r1, 10, r2
#else
#define CREATE_VPTE_OFFSET1(r1, r2) \
				srax	r1, PAGE_SHIFT, r2
#define CREATE_VPTE_OFFSET2(r1, r2) \
				sllx	r2, 3, r2
#endif

/* DTLB ** ICACHE line 1: Quick user TLB misses		*/
	mov		TLB_SFSR, %g1
	ldxa		[%g1 + %g1] ASI_DMMU, %g4	! Get TAG_ACCESS
	andcc		%g4, TAG_CONTEXT_BITS, %g0	! From Nucleus?
from_tl1_trap:
	rdpr		%tl, %g5			! For TL==3 test
	CREATE_VPTE_OFFSET1(%g4, %g6)			! Create VPTE offset
	be,pn		%xcc, kvmap			! Yep, special processing
	 CREATE_VPTE_OFFSET2(%g4, %g6)			! Create VPTE offset
	cmp		%g5, 4				! Last trap level?

/* DTLB ** ICACHE line 2: User finish + quick kernel TLB misses	*/
	be,pn		%xcc, longpath			! Yep, cannot risk VPTE miss
	 nop						! delay slot
	ldxa		[%g3 + %g6] ASI_S, %g5		! Load VPTE
1:	brgez,pn	%g5, longpath			! Invalid, branch out
	 nop						! Delay-slot
9:	stxa		%g5, [%g0] ASI_DTLB_DATA_IN	! Reload TLB
	retry						! Trap return
	nop

/* DTLB ** ICACHE line 3: winfixups+real_faults		*/
longpath:
	rdpr		%pstate, %g5			! Move into alternate globals
	wrpr		%g5, PSTATE_AG|PSTATE_MG, %pstate
	rdpr		%tl, %g4			! See where we came from.
	cmp		%g4, 1				! Is etrap/rtrap window fault?
	mov		TLB_TAG_ACCESS, %g4		! Prepare for fault processing
	ldxa		[%g4] ASI_DMMU, %g5		! Load faulting VA page
	be,pt		%xcc, sparc64_realfault_common	! Jump to normal fault handling
	 mov		FAULT_CODE_DTLB, %g4		! It was read from DTLB

/* DTLB ** ICACHE line 4: Unused...	*/
	ba,a,pt		%xcc, winfix_trampoline		! Call window fixup code
	nop
	nop
	nop
	nop
	nop
	nop
	nop

#undef CREATE_VPTE_OFFSET1
#undef CREATE_VPTE_OFFSET2
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 20 years ago			`/* $Id: dtlb_base.S,v 1.17 2001/10/11 22:33:52 davem Exp $`
			`* dtlb_base.S: Front end to DTLB miss replacement strategy.`
			`* This is included directly into the trap table.`
			`*`
			`* Copyright (C) 1996,1998 David S. Miller (davem@redhat.com)`
			`* Copyright (C) 1997,1998 Jakub Jelinek (jj@ultra.linux.cz)`
			`*/`

			`#include <asm/pgtable.h>`
			`#include <asm/mmu.h>`

			`/* %g1 TLB_SFSR (%g1 + %g1 == TLB_TAG_ACCESS)`
			`* %g2 (KERN_HIGHBITS \| KERN_LOWBITS)`
			`* %g3 VPTE base (0xfffffffe00000000) Spitfire/Blackbird (44-bit VA space)`
			`* (0xffe0000000000000) Cheetah (64-bit VA space)`
			`* %g7 __pa(current->mm->pgd)`
			`*`
			`* The VPTE base value is completely magic, but note that`
			`* few places in the kernel other than these TLB miss`
			`* handlers know anything about the VPTE mechanism or`
			`* how it works (see VPTE_SIZE, TASK_SIZE and PTRS_PER_PGD).`
			`* Consider the 44-bit VADDR Ultra-I/II case as an example:`
			`*`
			`* VA[0 : (1<<43)] produce VPTE index [%g3 : 0]`
			`* VA[0 : -(1<<43)] produce VPTE index [%g3-(1<<(43-PAGE_SHIFT+3)) : %g3]`
			`*`
			`* For Cheetah's 64-bit VADDR space this is:`
			`*`
			`* VA[0 : (1<<63)] produce VPTE index [%g3 : 0]`
			`* VA[0 : -(1<<63)] produce VPTE index [%g3-(1<<(63-PAGE_SHIFT+3)) : %g3]`
			`*`
			`* If you're paying attention you'll notice that this means half of`
			`* the VPTE table is above %g3 and half is below, low VA addresses`
			`* map progressively upwards from %g3, and high VA addresses map`
			`* progressively upwards towards %g3. This trick was needed to make`
			`* the same 8 instruction handler work both for Spitfire/Blackbird's`
			`* peculiar VA space hole configuration and the full 64-bit VA space`
			`* one of Cheetah at the same time.`
			`*/`

			`/* Ways we can get here:`
			`*`
			`* 1) Nucleus loads and stores to/from PA-->VA direct mappings.`
			`* 2) Nucleus loads and stores to/from vmalloc() areas.`
			`* 3) User loads and stores.`
			`* 4) User space accesses by nucleus at tl0`
			`*/`

			`#if PAGE_SHIFT == 13`
			`/*`
			`* To compute vpte offset, we need to do ((addr >> 13) << 3),`
			`* which can be optimized to (addr >> 10) if bits 10/11/12 can`
			`* be guaranteed to be 0 ... mmu_context.h does guarantee this`
			`* by only using 10 bits in the hwcontext value.`
			`*/`
[SPARC64]: Fix boot failures on SunBlade-150 The sequence to move over to the Linux trap tables from the firmware ones needs to be more air tight. It turns out that to be %100 safe we do need to be able to translate OBP mappings in our TLB miss handlers early. In order not to eat up a lot of kernel image memory with static page tables, just use the translations array in the OBP TLB miss handlers. That solves the bulk of the problem. Furthermore, to make sure the OBP TLB miss path will work even before the fixed MMU globals are loaded, explicitly load %g1 to TLB_SFSR at the beginning of the i-TLB and d-TLB miss handlers. To ease the OBP TLB miss walking of the prom_trans[] array, we sort it then delete all of the non-OBP entries in there (for example, there are entries for the kernel image itself which we're not interested in at all). We also save about 32K of kernel image size with this change. Not a bad side effect :-) There are still some reasons why trampoline.S can't use the setup_trap_table() yet. The most noteworthy are: 1) OBP boots secondary processors with non-bias'd stack for some reason. This is easily fixed by using a small bootup stack in the kernel image explicitly for this purpose. 2) Doing a firmware call via the normal C call prom_set_trap_table() goes through the whole OBP enter/exit sequence that saves and restores OBP and Linux kernel state in the MMUs. This path unfortunately does a "flush %g6" while loading up the OBP locked TLB entries for the firmware call. If we setup the %g6 in the trampoline.S code properly, that is in the PAGE_OFFSET linear mapping, but we're not on the kernel trap table yet so those addresses won't translate properly. One idea is to do a by-hand firmware call like we do in the early bootup code and elsewhere here in trampoline.S But this fails as well, as aparently the secondary processors are not booted with OBP's special locked TLB entries loaded. These are necessary for the firwmare to processes TLB misses correctly up until the point where we take over the trap table. This does need to be resolved at some point. Signed-off-by: David S. Miller <davem@davemloft.net> 19 years ago			`#define CREATE_VPTE_OFFSET1(r1, r2) nop`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 20 years ago			`#define CREATE_VPTE_OFFSET2(r1, r2) \`
			`srax r1, 10, r2`
			`#else`
			`#define CREATE_VPTE_OFFSET1(r1, r2) \`
			`srax r1, PAGE_SHIFT, r2`
			`#define CREATE_VPTE_OFFSET2(r1, r2) \`
			`sllx r2, 3, r2`
			`#endif`

			`/* DTLB ** ICACHE line 1: Quick user TLB misses */`
[SPARC64]: Fix boot failures on SunBlade-150 The sequence to move over to the Linux trap tables from the firmware ones needs to be more air tight. It turns out that to be %100 safe we do need to be able to translate OBP mappings in our TLB miss handlers early. In order not to eat up a lot of kernel image memory with static page tables, just use the translations array in the OBP TLB miss handlers. That solves the bulk of the problem. Furthermore, to make sure the OBP TLB miss path will work even before the fixed MMU globals are loaded, explicitly load %g1 to TLB_SFSR at the beginning of the i-TLB and d-TLB miss handlers. To ease the OBP TLB miss walking of the prom_trans[] array, we sort it then delete all of the non-OBP entries in there (for example, there are entries for the kernel image itself which we're not interested in at all). We also save about 32K of kernel image size with this change. Not a bad side effect :-) There are still some reasons why trampoline.S can't use the setup_trap_table() yet. The most noteworthy are: 1) OBP boots secondary processors with non-bias'd stack for some reason. This is easily fixed by using a small bootup stack in the kernel image explicitly for this purpose. 2) Doing a firmware call via the normal C call prom_set_trap_table() goes through the whole OBP enter/exit sequence that saves and restores OBP and Linux kernel state in the MMUs. This path unfortunately does a "flush %g6" while loading up the OBP locked TLB entries for the firmware call. If we setup the %g6 in the trampoline.S code properly, that is in the PAGE_OFFSET linear mapping, but we're not on the kernel trap table yet so those addresses won't translate properly. One idea is to do a by-hand firmware call like we do in the early bootup code and elsewhere here in trampoline.S But this fails as well, as aparently the secondary processors are not booted with OBP's special locked TLB entries loaded. These are necessary for the firwmare to processes TLB misses correctly up until the point where we take over the trap table. This does need to be resolved at some point. Signed-off-by: David S. Miller <davem@davemloft.net> 19 years ago			`mov TLB_SFSR, %g1`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 20 years ago			`ldxa [%g1 + %g1] ASI_DMMU, %g4 ! Get TAG_ACCESS`
			`andcc %g4, TAG_CONTEXT_BITS, %g0 ! From Nucleus?`
			`from_tl1_trap:`
			`rdpr %tl, %g5 ! For TL==3 test`
			`CREATE_VPTE_OFFSET1(%g4, %g6) ! Create VPTE offset`
[SPARC64]: Move kernel TLB miss handling into a seperate file. Signed-off-by: David S. Miller <davem@davemloft.net> 20 years ago			`be,pn %xcc, kvmap ! Yep, special processing`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 20 years ago			`CREATE_VPTE_OFFSET2(%g4, %g6) ! Create VPTE offset`
			`cmp %g5, 4 ! Last trap level?`

			`/* DTLB ** ICACHE line 2: User finish + quick kernel TLB misses */`
[SPARC64]: Fix boot failures on SunBlade-150 The sequence to move over to the Linux trap tables from the firmware ones needs to be more air tight. It turns out that to be %100 safe we do need to be able to translate OBP mappings in our TLB miss handlers early. In order not to eat up a lot of kernel image memory with static page tables, just use the translations array in the OBP TLB miss handlers. That solves the bulk of the problem. Furthermore, to make sure the OBP TLB miss path will work even before the fixed MMU globals are loaded, explicitly load %g1 to TLB_SFSR at the beginning of the i-TLB and d-TLB miss handlers. To ease the OBP TLB miss walking of the prom_trans[] array, we sort it then delete all of the non-OBP entries in there (for example, there are entries for the kernel image itself which we're not interested in at all). We also save about 32K of kernel image size with this change. Not a bad side effect :-) There are still some reasons why trampoline.S can't use the setup_trap_table() yet. The most noteworthy are: 1) OBP boots secondary processors with non-bias'd stack for some reason. This is easily fixed by using a small bootup stack in the kernel image explicitly for this purpose. 2) Doing a firmware call via the normal C call prom_set_trap_table() goes through the whole OBP enter/exit sequence that saves and restores OBP and Linux kernel state in the MMUs. This path unfortunately does a "flush %g6" while loading up the OBP locked TLB entries for the firmware call. If we setup the %g6 in the trampoline.S code properly, that is in the PAGE_OFFSET linear mapping, but we're not on the kernel trap table yet so those addresses won't translate properly. One idea is to do a by-hand firmware call like we do in the early bootup code and elsewhere here in trampoline.S But this fails as well, as aparently the secondary processors are not booted with OBP's special locked TLB entries loaded. These are necessary for the firwmare to processes TLB misses correctly up until the point where we take over the trap table. This does need to be resolved at some point. Signed-off-by: David S. Miller <davem@davemloft.net> 19 years ago			`be,pn %xcc, longpath ! Yep, cannot risk VPTE miss`
			`nop ! delay slot`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 20 years ago			`ldxa [%g3 + %g6] ASI_S, %g5 ! Load VPTE`
			`1: brgez,pn %g5, longpath ! Invalid, branch out`
			`nop ! Delay-slot`
			`9: stxa %g5, [%g0] ASI_DTLB_DATA_IN ! Reload TLB`
			`retry ! Trap return`
[SPARC64]: Move kernel TLB miss handling into a seperate file. Signed-off-by: David S. Miller <davem@davemloft.net> 20 years ago			`nop`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 20 years ago
			`/* DTLB ** ICACHE line 3: winfixups+real_faults */`
			`longpath:`
			`rdpr %pstate, %g5 ! Move into alternate globals`
			`wrpr %g5, PSTATE_AG\|PSTATE_MG, %pstate`
			`rdpr %tl, %g4 ! See where we came from.`
			`cmp %g4, 1 ! Is etrap/rtrap window fault?`
			`mov TLB_TAG_ACCESS, %g4 ! Prepare for fault processing`
			`ldxa [%g4] ASI_DMMU, %g5 ! Load faulting VA page`
			`be,pt %xcc, sparc64_realfault_common ! Jump to normal fault handling`
			`mov FAULT_CODE_DTLB, %g4 ! It was read from DTLB`

			`/* DTLB ** ICACHE line 4: Unused... */`
			`ba,a,pt %xcc, winfix_trampoline ! Call window fixup code`
			`nop`
			`nop`
			`nop`
			`nop`
			`nop`
			`nop`
[SPARC64]: Fix boot failures on SunBlade-150 The sequence to move over to the Linux trap tables from the firmware ones needs to be more air tight. It turns out that to be %100 safe we do need to be able to translate OBP mappings in our TLB miss handlers early. In order not to eat up a lot of kernel image memory with static page tables, just use the translations array in the OBP TLB miss handlers. That solves the bulk of the problem. Furthermore, to make sure the OBP TLB miss path will work even before the fixed MMU globals are loaded, explicitly load %g1 to TLB_SFSR at the beginning of the i-TLB and d-TLB miss handlers. To ease the OBP TLB miss walking of the prom_trans[] array, we sort it then delete all of the non-OBP entries in there (for example, there are entries for the kernel image itself which we're not interested in at all). We also save about 32K of kernel image size with this change. Not a bad side effect :-) There are still some reasons why trampoline.S can't use the setup_trap_table() yet. The most noteworthy are: 1) OBP boots secondary processors with non-bias'd stack for some reason. This is easily fixed by using a small bootup stack in the kernel image explicitly for this purpose. 2) Doing a firmware call via the normal C call prom_set_trap_table() goes through the whole OBP enter/exit sequence that saves and restores OBP and Linux kernel state in the MMUs. This path unfortunately does a "flush %g6" while loading up the OBP locked TLB entries for the firmware call. If we setup the %g6 in the trampoline.S code properly, that is in the PAGE_OFFSET linear mapping, but we're not on the kernel trap table yet so those addresses won't translate properly. One idea is to do a by-hand firmware call like we do in the early bootup code and elsewhere here in trampoline.S But this fails as well, as aparently the secondary processors are not booted with OBP's special locked TLB entries loaded. These are necessary for the firwmare to processes TLB misses correctly up until the point where we take over the trap table. This does need to be resolved at some point. Signed-off-by: David S. Miller <davem@davemloft.net> 19 years ago			`nop`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 20 years ago
			`#undef CREATE_VPTE_OFFSET1`
			`#undef CREATE_VPTE_OFFSET2`