|  | /* Copyright (C) 2003-2007 Analog Devices Inc. | 
|  | * | 
|  | * This file is subject to the terms and conditions of the GNU General Public | 
|  | * License. | 
|  | */ | 
|  |  | 
|  | #define ASSEMBLY | 
|  |  | 
|  | #include <asm/linkage.h> | 
|  | #include <asm/cplb.h> | 
|  | #include <config.h> | 
|  | #include <asm/blackfin.h> | 
|  |  | 
|  | .text | 
|  |  | 
|  | /* This is an external function being called by the user | 
|  | * application through __flush_cache_all. Currently this function | 
|  | * serves the purpose of flushing all the pending writes in | 
|  | * in the instruction cache. | 
|  | */ | 
|  |  | 
|  | ENTRY(_flush_instruction_cache) | 
|  | [--SP] = ( R7:6, P5:4 ); | 
|  | LINK 12; | 
|  | SP += -12; | 
|  | P5.H = (ICPLB_ADDR0 >> 16); | 
|  | P5.L = (ICPLB_ADDR0 & 0xFFFF); | 
|  | P4.H = (ICPLB_DATA0 >> 16); | 
|  | P4.L = (ICPLB_DATA0 & 0xFFFF); | 
|  | R7 = CPLB_VALID | CPLB_L1_CHBL; | 
|  | R6 = 16; | 
|  | inext:	R0 = [P5++]; | 
|  | R1 = [P4++]; | 
|  | [--SP] =  RETS; | 
|  | CALL _icplb_flush;	/* R0 = page, R1 = data*/ | 
|  | RETS = [SP++]; | 
|  | iskip:	R6 += -1; | 
|  | CC = R6; | 
|  | IF CC JUMP inext; | 
|  | SSYNC; | 
|  | SP += 12; | 
|  | UNLINK; | 
|  | ( R7:6, P5:4 ) = [SP++]; | 
|  | RTS; | 
|  |  | 
|  | /* This is an internal function to flush all pending | 
|  | * writes in the cache associated with a particular ICPLB. | 
|  | * | 
|  | * R0 -  page's start address | 
|  | * R1 -  CPLB's data field. | 
|  | */ | 
|  |  | 
|  | .align 2 | 
|  | ENTRY(_icplb_flush) | 
|  | [--SP] = ( R7:0, P5:0 ); | 
|  | [--SP] = LC0; | 
|  | [--SP] = LT0; | 
|  | [--SP] = LB0; | 
|  | [--SP] = LC1; | 
|  | [--SP] = LT1; | 
|  | [--SP] = LB1; | 
|  |  | 
|  | /* If it's a 1K or 4K page, then it's quickest to | 
|  | * just systematically flush all the addresses in | 
|  | * the page, regardless of whether they're in the | 
|  | * cache, or dirty. If it's a 1M or 4M page, there | 
|  | * are too many addresses, and we have to search the | 
|  | * cache for lines corresponding to the page. | 
|  | */ | 
|  |  | 
|  | CC = BITTST(R1, 17);	/* 1MB or 4MB */ | 
|  | IF !CC JUMP iflush_whole_page; | 
|  |  | 
|  | /* We're only interested in the page's size, so extract | 
|  | * this from the CPLB (bits 17:16), and scale to give an | 
|  | * offset into the page_size and page_prefix tables. | 
|  | */ | 
|  |  | 
|  | R1 <<= 14; | 
|  | R1 >>= 30; | 
|  | R1 <<= 2; | 
|  |  | 
|  | /* We can also determine the sub-bank used, because this is | 
|  | * taken from bits 13:12 of the address. | 
|  | */ | 
|  |  | 
|  | R3 = ((12<<8)|2);		/* Extraction pattern */ | 
|  | nop;				/* Anamoly 05000209 */ | 
|  | R4 = EXTRACT(R0, R3.L) (Z);	/* Extract bits */ | 
|  |  | 
|  | /* Save in extraction pattern for later deposit. */ | 
|  | R3.H = R4.L << 0; | 
|  |  | 
|  | /* So: | 
|  | * R0 = Page start | 
|  | * R1 = Page length (actually, offset into size/prefix tables) | 
|  | * R3 = sub-bank deposit values | 
|  | * | 
|  | * The cache has 2 Ways, and 64 sets, so we iterate through | 
|  | * the sets, accessing the tag for each Way, for our Bank and | 
|  | * sub-bank, looking for dirty, valid tags that match our | 
|  | * address prefix. | 
|  | */ | 
|  |  | 
|  | P5.L = (ITEST_COMMAND & 0xFFFF); | 
|  | P5.H = (ITEST_COMMAND >> 16); | 
|  | P4.L = (ITEST_DATA0 & 0xFFFF); | 
|  | P4.H = (ITEST_DATA0 >> 16); | 
|  |  | 
|  | P0.L = page_prefix_table; | 
|  | P0.H = page_prefix_table; | 
|  | P1 = R1; | 
|  | R5 = 0;			/* Set counter*/ | 
|  | P0 = P1 + P0; | 
|  | R4 = [P0];		/* This is the address prefix*/ | 
|  |  | 
|  | /* We're reading (bit 1==0) the tag (bit 2==0), and we | 
|  | * don't care about which double-word, since we're only | 
|  | * fetching tags, so we only have to set Set, Bank, | 
|  | * Sub-bank and Way. | 
|  | */ | 
|  |  | 
|  | P2 = 4; | 
|  | LSETUP (ifs1, ife1) LC1 = P2; | 
|  | ifs1:	P0 = 32;		/* iterate over all sets*/ | 
|  | LSETUP (ifs0, ife0) LC0 = P0; | 
|  | ifs0:	R6 = R5 << 5;		/* Combine set*/ | 
|  | R6.H = R3.H << 0 ;	/* and sub-bank*/ | 
|  | [P5] = R6;		/* Issue Command*/ | 
|  | SSYNC;			/* CSYNC will not work here :(*/ | 
|  | R7 = [P4];		/* and read Tag.*/ | 
|  | CC = BITTST(R7, 0);	/* Check if valid*/ | 
|  | IF !CC JUMP ifskip;	/* and skip if not.*/ | 
|  |  | 
|  | /* Compare against the page address. First, plant bits 13:12 | 
|  | * into the tag, since those aren't part of the returned data. | 
|  | */ | 
|  |  | 
|  | R7 = DEPOSIT(R7, R3);	/* set 13:12*/ | 
|  | R1 = R7 & R4;		/* Mask off lower bits*/ | 
|  | CC = R1 == R0;		/* Compare against page start.*/ | 
|  | IF !CC JUMP ifskip;	/* Skip it if it doesn't match.*/ | 
|  |  | 
|  | /* Tag address matches against page, so this is an entry | 
|  | * we must flush. | 
|  | */ | 
|  |  | 
|  | R7 >>= 10;		/* Mask off the non-address bits*/ | 
|  | R7 <<= 10; | 
|  | P3 = R7; | 
|  | IFLUSH [P3];		/* And flush the entry*/ | 
|  | ifskip: | 
|  | ife0:	R5 += 1;		/* Advance to next Set*/ | 
|  | ife1:	NOP; | 
|  |  | 
|  | ifinished: | 
|  | SSYNC;			/* Ensure the data gets out to mem.*/ | 
|  |  | 
|  | /*Finished. Restore context.*/ | 
|  | LB1 = [SP++]; | 
|  | LT1 = [SP++]; | 
|  | LC1 = [SP++]; | 
|  | LB0 = [SP++]; | 
|  | LT0 = [SP++]; | 
|  | LC0 = [SP++]; | 
|  | ( R7:0, P5:0 ) = [SP++]; | 
|  | RTS; | 
|  |  | 
|  | iflush_whole_page: | 
|  | /* It's a 1K or 4K page, so quicker to just flush the | 
|  | * entire page. | 
|  | */ | 
|  |  | 
|  | P1 = 32;		/* For 1K pages*/ | 
|  | P2 = P1 << 2;		/* For 4K pages*/ | 
|  | P0 = R0;		/* Start of page*/ | 
|  | CC = BITTST(R1, 16);	/* Whether 1K or 4K*/ | 
|  | IF CC P1 = P2; | 
|  | P1 += -1;		/* Unroll one iteration*/ | 
|  | SSYNC; | 
|  | IFLUSH [P0++];		/* because CSYNC can't end loops.*/ | 
|  | LSETUP (isall, ieall) LC0 = P1; | 
|  | isall:IFLUSH [P0++]; | 
|  | ieall: NOP; | 
|  | SSYNC; | 
|  | JUMP ifinished; | 
|  |  | 
|  | /* This is an external function being called by the user | 
|  | * application through __flush_cache_all. Currently this function | 
|  | * serves the purpose of flushing all the pending writes in | 
|  | * in the data cache. | 
|  | */ | 
|  |  | 
|  | ENTRY(_flush_data_cache) | 
|  | [--SP] = ( R7:6, P5:4 ); | 
|  | LINK 12; | 
|  | SP += -12; | 
|  | P5.H = (DCPLB_ADDR0 >> 16); | 
|  | P5.L = (DCPLB_ADDR0 & 0xFFFF); | 
|  | P4.H = (DCPLB_DATA0 >> 16); | 
|  | P4.L = (DCPLB_DATA0 & 0xFFFF); | 
|  | R7 = CPLB_VALID | CPLB_L1_CHBL | CPLB_DIRTY (Z); | 
|  | R6 = 16; | 
|  | next:	R0 = [P5++]; | 
|  | R1 = [P4++]; | 
|  | CC = BITTST(R1, 14);	/* Is it write-through?*/ | 
|  | IF CC JUMP skip;	/* If so, ignore it.*/ | 
|  | R2 = R1 & R7;		/* Is it a dirty, cached page?*/ | 
|  | CC = R2; | 
|  | IF !CC JUMP skip;	/* If not, ignore it.*/ | 
|  | [--SP] = RETS; | 
|  | CALL _dcplb_flush;	/* R0 = page, R1 = data*/ | 
|  | RETS = [SP++]; | 
|  | skip:	R6 += -1; | 
|  | CC = R6; | 
|  | IF CC JUMP next; | 
|  | SSYNC; | 
|  | SP += 12; | 
|  | UNLINK; | 
|  | ( R7:6, P5:4 ) = [SP++]; | 
|  | RTS; | 
|  |  | 
|  | /* This is an internal function to flush all pending | 
|  | * writes in the cache associated with a particular DCPLB. | 
|  | * | 
|  | * R0 -  page's start address | 
|  | * R1 -  CPLB's data field. | 
|  | */ | 
|  |  | 
|  | .align 2 | 
|  | ENTRY(_dcplb_flush) | 
|  | [--SP] = ( R7:0, P5:0 ); | 
|  | [--SP] = LC0; | 
|  | [--SP] = LT0; | 
|  | [--SP] = LB0; | 
|  | [--SP] = LC1; | 
|  | [--SP] = LT1; | 
|  | [--SP] = LB1; | 
|  |  | 
|  | /* If it's a 1K or 4K page, then it's quickest to | 
|  | * just systematically flush all the addresses in | 
|  | * the page, regardless of whether they're in the | 
|  | * cache, or dirty. If it's a 1M or 4M page, there | 
|  | * are too many addresses, and we have to search the | 
|  | * cache for lines corresponding to the page. | 
|  | */ | 
|  |  | 
|  | CC = BITTST(R1, 17);	/* 1MB or 4MB */ | 
|  | IF !CC JUMP dflush_whole_page; | 
|  |  | 
|  | /* We're only interested in the page's size, so extract | 
|  | * this from the CPLB (bits 17:16), and scale to give an | 
|  | * offset into the page_size and page_prefix tables. | 
|  | */ | 
|  |  | 
|  | R1 <<= 14; | 
|  | R1 >>= 30; | 
|  | R1 <<= 2; | 
|  |  | 
|  | /* The page could be mapped into Bank A or Bank B, depending | 
|  | * on (a) whether both banks are configured as cache, and | 
|  | * (b) on whether address bit A[x] is set. x is determined | 
|  | * by DCBS in DMEM_CONTROL | 
|  | */ | 
|  |  | 
|  | R2 = 0;			/* Default to Bank A (Bank B would be 1)*/ | 
|  |  | 
|  | P0.L = (DMEM_CONTROL & 0xFFFF); | 
|  | P0.H = (DMEM_CONTROL >> 16); | 
|  |  | 
|  | R3 = [P0];		/* If Bank B is not enabled as cache*/ | 
|  | CC = BITTST(R3, 2);	/* then Bank A is our only option.*/ | 
|  | IF CC JUMP bank_chosen; | 
|  |  | 
|  | R4 = 1<<14;		/* If DCBS==0, use A[14].*/ | 
|  | R5 = R4 << 7;		/* If DCBS==1, use A[23];*/ | 
|  | CC = BITTST(R3, 4); | 
|  | IF CC R4 = R5;		/* R4 now has either bit 14 or bit 23 set.*/ | 
|  | R5 = R0 & R4;		/* Use it to test the Page address*/ | 
|  | CC = R5;		/* and if that bit is set, we use Bank B,*/ | 
|  | R2 = CC;		/* else we use Bank A.*/ | 
|  | R2 <<= 23;		/* The Bank selection's at posn 23.*/ | 
|  |  | 
|  | bank_chosen: | 
|  |  | 
|  | /* We can also determine the sub-bank used, because this is | 
|  | * taken from bits 13:12 of the address. | 
|  | */ | 
|  |  | 
|  | R3 = ((12<<8)|2);		/* Extraction pattern */ | 
|  | nop;				/*Anamoly 05000209*/ | 
|  | R4 = EXTRACT(R0, R3.L) (Z);	/* Extract bits*/ | 
|  | /* Save in extraction pattern for later deposit.*/ | 
|  | R3.H = R4.L << 0; | 
|  |  | 
|  | /* So: | 
|  | * R0 = Page start | 
|  | * R1 = Page length (actually, offset into size/prefix tables) | 
|  | * R2 = Bank select mask | 
|  | * R3 = sub-bank deposit values | 
|  | * | 
|  | * The cache has 2 Ways, and 64 sets, so we iterate through | 
|  | * the sets, accessing the tag for each Way, for our Bank and | 
|  | * sub-bank, looking for dirty, valid tags that match our | 
|  | * address prefix. | 
|  | */ | 
|  |  | 
|  | P5.L = (DTEST_COMMAND & 0xFFFF); | 
|  | P5.H = (DTEST_COMMAND >> 16); | 
|  | P4.L = (DTEST_DATA0 & 0xFFFF); | 
|  | P4.H = (DTEST_DATA0 >> 16); | 
|  |  | 
|  | P0.L = page_prefix_table; | 
|  | P0.H = page_prefix_table; | 
|  | P1 = R1; | 
|  | R5 = 0;			/* Set counter*/ | 
|  | P0 = P1 + P0; | 
|  | R4 = [P0];		/* This is the address prefix*/ | 
|  |  | 
|  |  | 
|  | /* We're reading (bit 1==0) the tag (bit 2==0), and we | 
|  | * don't care about which double-word, since we're only | 
|  | * fetching tags, so we only have to set Set, Bank, | 
|  | * Sub-bank and Way. | 
|  | */ | 
|  |  | 
|  | P2 = 2; | 
|  | LSETUP (fs1, fe1) LC1 = P2; | 
|  | fs1:	P0 = 64;		/* iterate over all sets*/ | 
|  | LSETUP (fs0, fe0) LC0 = P0; | 
|  | fs0:	R6 = R5 << 5;		/* Combine set*/ | 
|  | R6.H = R3.H << 0 ;	/* and sub-bank*/ | 
|  | R6 = R6 | R2;		/* and Bank. Leave Way==0 at first.*/ | 
|  | BITSET(R6,14); | 
|  | [P5] = R6;		/* Issue Command*/ | 
|  | SSYNC; | 
|  | R7 = [P4];		/* and read Tag.*/ | 
|  | CC = BITTST(R7, 0);	/* Check if valid*/ | 
|  | IF !CC JUMP fskip;	/* and skip if not.*/ | 
|  | CC = BITTST(R7, 1);	/* Check if dirty*/ | 
|  | IF !CC JUMP fskip;	/* and skip if not.*/ | 
|  |  | 
|  | /* Compare against the page address. First, plant bits 13:12 | 
|  | * into the tag, since those aren't part of the returned data. | 
|  | */ | 
|  |  | 
|  | R7 = DEPOSIT(R7, R3);	/* set 13:12*/ | 
|  | R1 = R7 & R4;		/* Mask off lower bits*/ | 
|  | CC = R1 == R0;		/* Compare against page start.*/ | 
|  | IF !CC JUMP fskip;	/* Skip it if it doesn't match.*/ | 
|  |  | 
|  | /* Tag address matches against page, so this is an entry | 
|  | * we must flush. | 
|  | */ | 
|  |  | 
|  | R7 >>= 10;		/* Mask off the non-address bits*/ | 
|  | R7 <<= 10; | 
|  | P3 = R7; | 
|  | SSYNC; | 
|  | FLUSHINV [P3];		/* And flush the entry*/ | 
|  | fskip: | 
|  | fe0:	R5 += 1;		/* Advance to next Set*/ | 
|  | fe1:	BITSET(R2, 26);		/* Go to next Way.*/ | 
|  |  | 
|  | dfinished: | 
|  | SSYNC;			/* Ensure the data gets out to mem.*/ | 
|  |  | 
|  | /*Finished. Restore context.*/ | 
|  | LB1 = [SP++]; | 
|  | LT1 = [SP++]; | 
|  | LC1 = [SP++]; | 
|  | LB0 = [SP++]; | 
|  | LT0 = [SP++]; | 
|  | LC0 = [SP++]; | 
|  | ( R7:0, P5:0 ) = [SP++]; | 
|  | RTS; | 
|  |  | 
|  | dflush_whole_page: | 
|  |  | 
|  | /* It's a 1K or 4K page, so quicker to just flush the | 
|  | * entire page. | 
|  | */ | 
|  |  | 
|  | P1 = 32;		/* For 1K pages*/ | 
|  | P2 = P1 << 2;		/* For 4K pages*/ | 
|  | P0 = R0;		/* Start of page*/ | 
|  | CC = BITTST(R1, 16);	/* Whether 1K or 4K*/ | 
|  | IF CC P1 = P2; | 
|  | P1 += -1;		/* Unroll one iteration*/ | 
|  | SSYNC; | 
|  | FLUSHINV [P0++];	/* because CSYNC can't end loops.*/ | 
|  | LSETUP (eall, eall) LC0 = P1; | 
|  | eall:	FLUSHINV [P0++]; | 
|  | SSYNC; | 
|  | JUMP dfinished; | 
|  |  | 
|  | .align 4; | 
|  | page_prefix_table: | 
|  | .byte4 	0xFFFFFC00;	/* 1K */ | 
|  | .byte4	0xFFFFF000;	/* 4K */ | 
|  | .byte4	0xFFF00000;	/* 1M */ | 
|  | .byte4	0xFFC00000;	/* 4M */ | 
|  | .page_prefix_table.end: |