Merge branch 'master' of git://git.denx.de/u-boot-usb
diff --git a/blackfin_config.mk b/blackfin_config.mk
index 7bde449..f3fcd7a 100644
--- a/blackfin_config.mk
+++ b/blackfin_config.mk
@@ -24,9 +24,12 @@
 CONFIG_BFIN_CPU := $(strip $(subst ",,$(CONFIG_BFIN_CPU)))
 CONFIG_BFIN_BOOT_MODE := $(strip $(subst ",,$(CONFIG_BFIN_BOOT_MODE)))
 
-PLATFORM_RELFLAGS += -ffixed-P5 -fomit-frame-pointer
+PLATFORM_RELFLAGS += -ffixed-P5 -fomit-frame-pointer -mno-fdpic
 PLATFORM_CPPFLAGS += -DCONFIG_BLACKFIN
 
+LDFLAGS += --gc-sections
+PLATFORM_RELFLAGS += -ffunction-sections -fdata-sections
+
 ifneq (,$(CONFIG_BFIN_CPU))
 PLATFORM_RELFLAGS += -mcpu=$(CONFIG_BFIN_CPU)
 endif
diff --git a/board/bf537-stamp/spi_flash.c b/board/bf537-stamp/spi_flash.c
index 99caa96..b147ce7 100644
--- a/board/bf537-stamp/spi_flash.c
+++ b/board/bf537-stamp/spi_flash.c
@@ -3,7 +3,7 @@
  *
  * Enter bugs at http://blackfin.uclinux.org/
  *
- * Copyright (c) 2005-2007 Analog Devices Inc.
+ * Copyright (c) 2005-2008 Analog Devices Inc.
  *
  * Licensed under the GPL-2 or later.
  */
@@ -163,7 +163,9 @@
 
 #define	TIMEOUT	5000	/* timeout of 5 seconds */
 
-/* BF54x support */
+/* If part has multiple SPI flashes, assume SPI0 as that is
+ * the one we can boot off of ...
+ */
 #ifndef pSPI_CTL
 # define pSPI_CTL  pSPI0_CTL
 # define pSPI_BAUD pSPI0_BAUD
@@ -171,19 +173,16 @@
 # define pSPI_RDBR pSPI0_RDBR
 # define pSPI_STAT pSPI0_STAT
 # define pSPI_TDBR pSPI0_TDBR
-# define SPI0_SCK	0x0001
-# define SPI0_MOSI	0x0004
-# define SPI0_MISO	0x0002
-# define SPI0_SEL1	0x0010
 #endif
 
 /* Default to the SPI SSEL that we boot off of:
  *	BF54x, BF537, (everything new?): SSEL1
- *	BF533, BF561: SSEL2
+ *	BF51x, BF533, BF561: SSEL2
  */
 #ifndef CONFIG_SPI_FLASH_SSEL
 # if defined(__ADSPBF531__) || defined(__ADSPBF532__) || defined(__ADSPBF533__) || \
-     defined(__ADSPBF538__) || defined(__ADSPBF539__) || defined(__ADSPBF561__)
+     defined(__ADSPBF538__) || defined(__ADSPBF539__) || defined(__ADSPBF561__) || \
+     defined(__ADSPBF51x__)
 #  define CONFIG_SPI_FLASH_SSEL 2
 # else
 #  define CONFIG_SPI_FLASH_SSEL 1
@@ -200,12 +199,15 @@
 
 	/* enable SPI pins: SSEL, MOSI, MISO, SCK */
 #ifdef __ADSPBF54x__
-	*pPORTE_FER |= (SPI0_SCK | SPI0_MOSI | SPI0_MISO | SPI0_SEL1);
+	*pPORTE_FER |= (PE0 | PE1 | PE2 | PE4);
 #elif defined(__ADSPBF534__) || defined(__ADSPBF536__) || defined(__ADSPBF537__)
 	*pPORTF_FER |= (PF10 | PF11 | PF12 | PF13);
 #elif defined(__ADSPBF52x__)
 	bfin_write_PORTG_MUX((bfin_read_PORTG_MUX() & ~PORT_x_MUX_0_MASK) | PORT_x_MUX_0_FUNC_3);
 	bfin_write_PORTG_FER(bfin_read_PORTG_FER() | PG1 | PG2 | PG3 | PG4);
+#elif defined(__ADSPBF51x__)
+	bfin_write_PORTG_MUX((bfin_read_PORTG_MUX() & ~PORT_x_MUX_7_MASK) | PORT_x_MUX_7_FUNC_1);
+	bfin_write_PORTG_FER(bfin_read_PORTG_FER() | PG12 | PG13 | PG14 | PG15);
 #endif
 
 	/* initate communication upon write of TDBR */
diff --git a/cpu/blackfin/Makefile b/cpu/blackfin/Makefile
index b4049ff..1378fd1 100644
--- a/cpu/blackfin/Makefile
+++ b/cpu/blackfin/Makefile
@@ -47,6 +47,7 @@
 
 # make sure our initcode (which goes into LDR) does not
 # have relocs or external references
+$(obj)initcode.o: CFLAGS += -fno-function-sections -fno-data-sections
 READINIT = env LC_ALL=C $(CROSS_COMPILE)readelf -s $<
 check_initcode: $(obj)initcode.o
 ifneq ($(CONFIG_BFIN_BOOT_MODE),BFIN_BOOT_BYPASS)
diff --git a/cpu/blackfin/cache.S b/cpu/blackfin/cache.S
index 9facadf..6ed655a 100644
--- a/cpu/blackfin/cache.S
+++ b/cpu/blackfin/cache.S
@@ -1,5 +1,10 @@
-/* cache.S - low level cache handling routines
- * Copyright (C) 2003-2007 Analog Devices Inc.
+/*
+ * Blackfin cache control code
+ *
+ * Copyright 2003-2008 Analog Devices Inc.
+ *
+ * Enter bugs at http://blackfin.uclinux.org/
+ *
  * Licensed under the GPL-2 or later.
  */
 
@@ -8,54 +13,75 @@
 #include <asm/blackfin.h>
 
 .text
-.align 2
-ENTRY(_blackfin_icache_flush_range)
-	R2 = -32;
-	R2 = R0 & R2;
-	P0 = R2;
-	P1 = R1;
-	CSYNC;
+/* Since all L1 caches work the same way, we use the same method for flushing
+ * them.  Only the actual flush instruction differs.  We write this in asm as
+ * GCC can be hard to coax into writing nice hardware loops.
+ *
+ * Also, we assume the following register setup:
+ * R0 = start address
+ * R1 = end address
+ */
+.macro do_flush flushins:req optflushins optnopins label
+
+	R2 = -L1_CACHE_BYTES;
+
+	/* start = (start & -L1_CACHE_BYTES) */
+	R0 = R0 & R2;
+
+	/* end = ((end - 1) & -L1_CACHE_BYTES) + L1_CACHE_BYTES; */
+	R1 += -1;
+	R1 = R1 & R2;
+	R1 += L1_CACHE_BYTES;
+
+	/* count = (end - start) >> L1_CACHE_SHIFT */
+	R2 = R1 - R0;
+	R2 >>= L1_CACHE_SHIFT;
+	P1 = R2;
+
+.ifnb \label
+\label :
+.endif
+	P0 = R0;
+	LSETUP (1f, 2f) LC1 = P1;
 1:
-	IFLUSH[P0++];
-	CC = P0 < P1(iu);
-	IF CC JUMP 1b(bp);
-	IFLUSH[P0];
-	SSYNC;
+.ifnb \optflushins
+	\optflushins [P0];
+.endif
+#if ANOMALY_05000443
+.ifb \optnopins
+2:
+.endif
+	\flushins [P0++];
+.ifnb \optnopins
+2:	\optnopins;
+.endif
+#else
+2:	\flushins [P0++];
+#endif
+
 	RTS;
+.endm
+
+/* Invalidate all instruction cache lines assocoiated with this memory area */
+ENTRY(_blackfin_icache_flush_range)
+	do_flush IFLUSH, , nop
 ENDPROC(_blackfin_icache_flush_range)
 
-ENTRY(_blackfin_dcache_flush_range)
-	R2 = -32;
-	R2 = R0 & R2;
-	P0 = R2;
-	P1 = R1;
-	CSYNC;
-1:
-	FLUSH[P0++];
-	CC = P0 < P1(iu);
-	IF CC JUMP 1b(bp);
-	FLUSH[P0];
-	SSYNC;
-	RTS;
-ENDPROC(_blackfin_dcache_flush_range)
+/* Flush all cache lines assocoiated with this area of memory. */
+ENTRY(_blackfin_icache_dcache_flush_range)
+	do_flush FLUSH, IFLUSH
+ENDPROC(_blackfin_icache_dcache_flush_range)
 
+/* Throw away all D-cached data in specified region without any obligation to
+ * write them back.  Since the Blackfin ISA does not have an "invalidate"
+ * instruction, we use flush/invalidate.  Perhaps as a speed optimization we
+ * could bang on the DTEST MMRs ...
+ */
 ENTRY(_blackfin_dcache_flush_invalidate_range)
-	R2 = -32;
-	R2 = R0 & R2;
-	P0 = R2;
-	P1 = R1;
-	CSYNC;
-1:
-	FLUSHINV[P0++];
-	CC = P0 < P1(iu);
-	IF CC JUMP 1b(bp);
-
-	/*
-	 * If the data crosses a cache line, then we'll be pointing to
-	 * the last cache line, but won't have flushed/invalidated it yet, so do
-	 * one more.
-	 */
-	FLUSHINV[P0];
-	SSYNC;
-	RTS;
+	do_flush FLUSHINV
 ENDPROC(_blackfin_dcache_flush_invalidate_range)
+
+/* Flush all data cache lines assocoiated with this memory area */
+ENTRY(_blackfin_dcache_flush_range)
+	do_flush FLUSH, , , .Ldfr
+ENDPROC(_blackfin_dcache_flush_range)
diff --git a/cpu/blackfin/initcode.c b/cpu/blackfin/initcode.c
index e733dd2..6091f8c 100644
--- a/cpu/blackfin/initcode.c
+++ b/cpu/blackfin/initcode.c
@@ -61,7 +61,11 @@
 	}
 #endif
 
-	uint32_t old_baud = serial_early_get_baud();
+	uint32_t old_baud;
+	if (BFIN_DEBUG_EARLY_SERIAL || CONFIG_BFIN_BOOT_MODE == BFIN_BOOT_UART)
+		old_baud = serial_early_get_baud();
+	else
+		old_baud = CONFIG_BAUDRATE;
 
 	if (BFIN_DEBUG_EARLY_SERIAL) {
 		serial_early_init();
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 642582b..59388d9 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -29,6 +29,7 @@
 COBJS-$(CONFIG_ATA_PIIX) += ata_piix.o
 COBJS-$(CONFIG_FSL_SATA) += fsl_sata.o
 COBJS-$(CONFIG_LIBATA) += libata.o
+COBJS-$(CONFIG_PATA_BFIN) += pata_bfin.o
 COBJS-$(CONFIG_SATA_SIL3114) += sata_sil3114.o
 COBJS-$(CONFIG_IDE_SIL680) += sil680.o
 COBJS-$(CONFIG_SCSI_SYM53C8XX) += sym53c8xx.o
diff --git a/drivers/block/pata_bfin.c b/drivers/block/pata_bfin.c
new file mode 100644
index 0000000..f16dabe
--- /dev/null
+++ b/drivers/block/pata_bfin.c
@@ -0,0 +1,1201 @@
+/*
+ * Driver for Blackfin on-chip ATAPI controller.
+ *
+ * Enter bugs at http://blackfin.uclinux.org/
+ *
+ * Copyright (c) 2008 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <common.h>
+#include <command.h>
+#include <config.h>
+#include <asm/byteorder.h>
+#include <asm/io.h>
+#include <asm/errno.h>
+#include <asm/mach-common/bits/pata.h>
+#include <ata.h>
+#include <libata.h>
+#include "pata_bfin.h"
+
+static struct ata_port port[CONFIG_SYS_SATA_MAX_DEVICE];
+
+/**
+ * PIO Mode - Frequency compatibility
+ */
+/* mode: 0         1         2         3         4 */
+static const u32 pio_fsclk[] =
+{ 33333333, 33333333, 33333333, 33333333, 33333333 };
+
+/**
+ * MDMA Mode - Frequency compatibility
+ */
+/*               mode:      0         1         2        */
+static const u32 mdma_fsclk[] = { 33333333, 33333333, 33333333 };
+
+/**
+ * UDMA Mode - Frequency compatibility
+ *
+ * UDMA5 - 100 MB/s   - SCLK  = 133 MHz
+ * UDMA4 - 66 MB/s    - SCLK >=  80 MHz
+ * UDMA3 - 44.4 MB/s  - SCLK >=  50 MHz
+ * UDMA2 - 33 MB/s    - SCLK >=  40 MHz
+ */
+/* mode: 0         1         2         3         4          5 */
+static const u32 udma_fsclk[] =
+{ 33333333, 33333333, 40000000, 50000000, 80000000, 133333333 };
+
+/**
+ * Register transfer timing table
+ */
+/*               mode:       0    1    2    3    4    */
+/* Cycle Time                     */
+static const u32 reg_t0min[]   = { 600, 383, 330, 180, 120 };
+/* DIOR/DIOW to end cycle         */
+static const u32 reg_t2min[]   = { 290, 290, 290, 70,  25  };
+/* DIOR/DIOW asserted pulse width */
+static const u32 reg_teocmin[] = { 290, 290, 290, 80,  70  };
+
+/**
+ * PIO timing table
+ */
+/*               mode:       0    1    2    3    4    */
+/* Cycle Time                     */
+static const u32 pio_t0min[]   = { 600, 383, 240, 180, 120 };
+/* Address valid to DIOR/DIORW    */
+static const u32 pio_t1min[]   = { 70,  50,  30,  30,  25  };
+/* DIOR/DIOW to end cycle         */
+static const u32 pio_t2min[]   = { 165, 125, 100, 80,  70  };
+/* DIOR/DIOW asserted pulse width */
+static const u32 pio_teocmin[] = { 165, 125, 100, 70,  25  };
+/* DIOW data hold                 */
+static const u32 pio_t4min[]   = { 30,  20,  15,  10,  10  };
+
+/* ******************************************************************
+ * Multiword DMA timing table
+ * ******************************************************************
+ */
+/*               mode:       0   1    2        */
+/* Cycle Time                     */
+static const u32 mdma_t0min[]  = { 480, 150, 120 };
+/* DIOR/DIOW asserted pulse width */
+static const u32 mdma_tdmin[]  = { 215, 80,  70  };
+/* DMACK to read data released    */
+static const u32 mdma_thmin[]  = { 20,  15,  10  };
+/* DIOR/DIOW to DMACK hold        */
+static const u32 mdma_tjmin[]  = { 20,  5,   5   };
+/* DIOR negated pulse width       */
+static const u32 mdma_tkrmin[] = { 50,  50,  25  };
+/* DIOR negated pulse width       */
+static const u32 mdma_tkwmin[] = { 215, 50,  25  };
+/* CS[1:0] valid to DIOR/DIOW     */
+static const u32 mdma_tmmin[]  = { 50,  30,  25  };
+/* DMACK to read data released    */
+static const u32 mdma_tzmax[]  = { 20,  25,  25  };
+
+/**
+ * Ultra DMA timing table
+ */
+/*               mode:         0    1    2    3    4    5       */
+static const u32 udma_tcycmin[]  = { 112, 73,  54,  39,  25,  17 };
+static const u32 udma_tdvsmin[]  = { 70,  48,  31,  20,  7,   5  };
+static const u32 udma_tenvmax[]  = { 70,  70,  70,  55,  55,  50 };
+static const u32 udma_trpmin[]   = { 160, 125, 100, 100, 100, 85 };
+static const u32 udma_tmin[]     = { 5,   5,   5,   5,   3,   3  };
+
+
+static const u32 udma_tmlimin = 20;
+static const u32 udma_tzahmin = 20;
+static const u32 udma_tenvmin = 20;
+static const u32 udma_tackmin = 20;
+static const u32 udma_tssmin = 50;
+
+static void msleep(int count)
+{
+	int i;
+
+	for (i = 0; i < count; i++)
+		udelay(1000);
+}
+
+/**
+ *
+ *	Function:       num_clocks_min
+ *
+ *	Description:
+ *	calculate number of SCLK cycles to meet minimum timing
+ */
+static unsigned short num_clocks_min(unsigned long tmin,
+				unsigned long fsclk)
+{
+	unsigned long tmp ;
+	unsigned short result;
+
+	tmp = tmin * (fsclk/1000/1000) / 1000;
+	result = (unsigned short)tmp;
+	if ((tmp*1000*1000) < (tmin*(fsclk/1000)))
+		result++;
+
+	return result;
+}
+
+/**
+ *	bfin_set_piomode - Initialize host controller PATA PIO timings
+ *	@ap: Port whose timings we are configuring
+ *	@pio_mode: mode
+ *
+ *	Set PIO mode for device.
+ *
+ *	LOCKING:
+ *	None (inherited from caller).
+ */
+
+static void bfin_set_piomode(struct ata_port *ap, int pio_mode)
+{
+	int mode = pio_mode - XFER_PIO_0;
+	void __iomem *base = (void __iomem *)ap->ioaddr.ctl_addr;
+	unsigned int fsclk = get_sclk();
+	unsigned short teoc_reg, t2_reg, teoc_pio;
+	unsigned short t4_reg, t2_pio, t1_reg;
+	unsigned short n0, n6, t6min = 5;
+
+	/* the most restrictive timing value is t6 and tc, the DIOW - data hold
+	* If one SCLK pulse is longer than this minimum value then register
+	* transfers cannot be supported at this frequency.
+	*/
+	n6 = num_clocks_min(t6min, fsclk);
+	if (mode >= 0 && mode <= 4 && n6 >= 1) {
+		debug("set piomode: mode=%d, fsclk=%ud\n", mode, fsclk);
+		/* calculate the timing values for register transfers. */
+		while (mode > 0 && pio_fsclk[mode] > fsclk)
+			mode--;
+
+		/* DIOR/DIOW to end cycle time */
+		t2_reg = num_clocks_min(reg_t2min[mode], fsclk);
+		/* DIOR/DIOW asserted pulse width */
+		teoc_reg = num_clocks_min(reg_teocmin[mode], fsclk);
+		/* Cycle Time */
+		n0  = num_clocks_min(reg_t0min[mode], fsclk);
+
+		/* increase t2 until we meed the minimum cycle length */
+		if (t2_reg + teoc_reg < n0)
+			t2_reg = n0 - teoc_reg;
+
+		/* calculate the timing values for pio transfers. */
+
+		/* DIOR/DIOW to end cycle time */
+		t2_pio = num_clocks_min(pio_t2min[mode], fsclk);
+		/* DIOR/DIOW asserted pulse width */
+		teoc_pio = num_clocks_min(pio_teocmin[mode], fsclk);
+		/* Cycle Time */
+		n0  = num_clocks_min(pio_t0min[mode], fsclk);
+
+		/* increase t2 until we meed the minimum cycle length */
+		if (t2_pio + teoc_pio < n0)
+			t2_pio = n0 - teoc_pio;
+
+		/* Address valid to DIOR/DIORW */
+		t1_reg = num_clocks_min(pio_t1min[mode], fsclk);
+
+		/* DIOW data hold */
+		t4_reg = num_clocks_min(pio_t4min[mode], fsclk);
+
+		ATAPI_SET_REG_TIM_0(base, (teoc_reg<<8 | t2_reg));
+		ATAPI_SET_PIO_TIM_0(base, (t4_reg<<12 | t2_pio<<4 | t1_reg));
+		ATAPI_SET_PIO_TIM_1(base, teoc_pio);
+		if (mode > 2) {
+			ATAPI_SET_CONTROL(base,
+				ATAPI_GET_CONTROL(base) | IORDY_EN);
+		} else {
+			ATAPI_SET_CONTROL(base,
+				ATAPI_GET_CONTROL(base) & ~IORDY_EN);
+		}
+
+		/* Disable host ATAPI PIO interrupts */
+		ATAPI_SET_INT_MASK(base, ATAPI_GET_INT_MASK(base)
+			& ~(PIO_DONE_MASK | HOST_TERM_XFER_MASK));
+		SSYNC();
+	}
+}
+
+/**
+ *
+ *    Function:       wait_complete
+ *
+ *    Description:    Waits the interrupt from device
+ *
+ */
+static inline void wait_complete(void __iomem *base, unsigned short mask)
+{
+	unsigned short status;
+	unsigned int i = 0;
+
+	for (i = 0; i < PATA_BFIN_WAIT_TIMEOUT; i++) {
+		status = ATAPI_GET_INT_STATUS(base) & mask;
+		if (status)
+			break;
+	}
+
+	ATAPI_SET_INT_STATUS(base, mask);
+}
+
+/**
+ *
+ *    Function:       write_atapi_register
+ *
+ *    Description:    Writes to ATA Device Resgister
+ *
+ */
+
+static void write_atapi_register(void __iomem *base,
+		unsigned long ata_reg, unsigned short value)
+{
+	/* Program the ATA_DEV_TXBUF register with write data (to be
+	 * written into the device).
+	 */
+	ATAPI_SET_DEV_TXBUF(base, value);
+
+	/* Program the ATA_DEV_ADDR register with address of the
+	 * device register (0x01 to 0x0F).
+	 */
+	ATAPI_SET_DEV_ADDR(base, ata_reg);
+
+	/* Program the ATA_CTRL register with dir set to write (1)
+	 */
+	ATAPI_SET_CONTROL(base, (ATAPI_GET_CONTROL(base) | XFER_DIR));
+
+	/* ensure PIO DMA is not set */
+	ATAPI_SET_CONTROL(base, (ATAPI_GET_CONTROL(base) & ~PIO_USE_DMA));
+
+	/* and start the transfer */
+	ATAPI_SET_CONTROL(base, (ATAPI_GET_CONTROL(base) | PIO_START));
+
+	/* Wait for the interrupt to indicate the end of the transfer.
+	 * (We need to wait on and clear rhe ATA_DEV_INT interrupt status)
+	 */
+	wait_complete(base, PIO_DONE_INT);
+}
+
+/**
+ *
+ *	Function:       read_atapi_register
+ *
+ *Description:    Reads from ATA Device Resgister
+ *
+ */
+
+static unsigned short read_atapi_register(void __iomem *base,
+		unsigned long ata_reg)
+{
+	/* Program the ATA_DEV_ADDR register with address of the
+	 * device register (0x01 to 0x0F).
+	 */
+	ATAPI_SET_DEV_ADDR(base, ata_reg);
+
+	/* Program the ATA_CTRL register with dir set to read (0) and
+	 */
+	ATAPI_SET_CONTROL(base, (ATAPI_GET_CONTROL(base) & ~XFER_DIR));
+
+	/* ensure PIO DMA is not set */
+	ATAPI_SET_CONTROL(base, (ATAPI_GET_CONTROL(base) & ~PIO_USE_DMA));
+
+	/* and start the transfer */
+	ATAPI_SET_CONTROL(base, (ATAPI_GET_CONTROL(base) | PIO_START));
+
+	/* Wait for the interrupt to indicate the end of the transfer.
+	 * (PIO_DONE interrupt is set and it doesn't seem to matter
+	 * that we don't clear it)
+	 */
+	wait_complete(base, PIO_DONE_INT);
+
+	/* Read the ATA_DEV_RXBUF register with write data (to be
+	 * written into the device).
+	 */
+	return ATAPI_GET_DEV_RXBUF(base);
+}
+
+/**
+ *
+ *    Function:       write_atapi_register_data
+ *
+ *    Description:    Writes to ATA Device Resgister
+ *
+ */
+
+static void write_atapi_data(void __iomem *base,
+		int len, unsigned short *buf)
+{
+	int i;
+
+	/* Set transfer length to 1 */
+	ATAPI_SET_XFER_LEN(base, 1);
+
+	/* Program the ATA_DEV_ADDR register with address of the
+	 * ATA_REG_DATA
+	 */
+	ATAPI_SET_DEV_ADDR(base, ATA_REG_DATA);
+
+	/* Program the ATA_CTRL register with dir set to write (1)
+	 */
+	ATAPI_SET_CONTROL(base, (ATAPI_GET_CONTROL(base) | XFER_DIR));
+
+	/* ensure PIO DMA is not set */
+	ATAPI_SET_CONTROL(base, (ATAPI_GET_CONTROL(base) & ~PIO_USE_DMA));
+
+	for (i = 0; i < len; i++) {
+		/* Program the ATA_DEV_TXBUF register with write data (to be
+		 * written into the device).
+		 */
+		ATAPI_SET_DEV_TXBUF(base, buf[i]);
+
+		/* and start the transfer */
+		ATAPI_SET_CONTROL(base, (ATAPI_GET_CONTROL(base) | PIO_START));
+
+		/* Wait for the interrupt to indicate the end of the transfer.
+		 * (We need to wait on and clear rhe ATA_DEV_INT
+		 * interrupt status)
+		 */
+		wait_complete(base, PIO_DONE_INT);
+	}
+}
+
+/**
+ *
+ *	Function:       read_atapi_register_data
+ *
+ *	Description:    Reads from ATA Device Resgister
+ *
+ */
+
+static void read_atapi_data(void __iomem *base,
+		int len, unsigned short *buf)
+{
+	int i;
+
+	/* Set transfer length to 1 */
+	ATAPI_SET_XFER_LEN(base, 1);
+
+	/* Program the ATA_DEV_ADDR register with address of the
+	 * ATA_REG_DATA
+	 */
+	ATAPI_SET_DEV_ADDR(base, ATA_REG_DATA);
+
+	/* Program the ATA_CTRL register with dir set to read (0) and
+	 */
+	ATAPI_SET_CONTROL(base, (ATAPI_GET_CONTROL(base) & ~XFER_DIR));
+
+	/* ensure PIO DMA is not set */
+	ATAPI_SET_CONTROL(base, (ATAPI_GET_CONTROL(base) & ~PIO_USE_DMA));
+
+	for (i = 0; i < len; i++) {
+		/* and start the transfer */
+		ATAPI_SET_CONTROL(base, (ATAPI_GET_CONTROL(base) | PIO_START));
+
+		/* Wait for the interrupt to indicate the end of the transfer.
+		 * (PIO_DONE interrupt is set and it doesn't seem to matter
+		 * that we don't clear it)
+		 */
+		wait_complete(base, PIO_DONE_INT);
+
+		/* Read the ATA_DEV_RXBUF register with write data (to be
+		 * written into the device).
+		 */
+		buf[i] = ATAPI_GET_DEV_RXBUF(base);
+	}
+}
+
+/**
+ *	bfin_check_status - Read device status reg & clear interrupt
+ *	@ap: port where the device is
+ *
+ *	Note: Original code is ata_check_status().
+ */
+
+static u8 bfin_check_status(struct ata_port *ap)
+{
+	void __iomem *base = (void __iomem *)ap->ioaddr.ctl_addr;
+	return read_atapi_register(base, ATA_REG_STATUS);
+}
+
+/**
+ *	bfin_check_altstatus - Read device alternate status reg
+ *	@ap: port where the device is
+ */
+
+static u8 bfin_check_altstatus(struct ata_port *ap)
+{
+	void __iomem *base = (void __iomem *)ap->ioaddr.ctl_addr;
+	return read_atapi_register(base, ATA_REG_ALTSTATUS);
+}
+
+/**
+ *      bfin_ata_busy_wait - Wait for a port status register
+ *      @ap: Port to wait for.
+ *      @bits: bits that must be clear
+ *      @max: number of 10uS waits to perform
+ *
+ *      Waits up to max*10 microseconds for the selected bits in the port's
+ *      status register to be cleared.
+ *      Returns final value of status register.
+ *
+ *      LOCKING:
+ *      Inherited from caller.
+ */
+static inline u8 bfin_ata_busy_wait(struct ata_port *ap, unsigned int bits,
+				unsigned int max, u8 usealtstatus)
+{
+	u8 status;
+
+	do {
+		udelay(10);
+		if (usealtstatus)
+			status = bfin_check_altstatus(ap);
+		else
+			status = bfin_check_status(ap);
+		max--;
+	} while (status != 0xff && (status & bits) && (max > 0));
+
+	return status;
+}
+
+/**
+ *	bfin_ata_busy_sleep - sleep until BSY clears, or timeout
+ *	@ap: port containing status register to be polled
+ *	@tmout_pat: impatience timeout in msecs
+ *	@tmout: overall timeout in msecs
+ *
+ *	Sleep until ATA Status register bit BSY clears,
+ *	or a timeout occurs.
+ *
+ *	RETURNS:
+ *	0 on success, -errno otherwise.
+ */
+static int bfin_ata_busy_sleep(struct ata_port *ap,
+		       long tmout_pat, unsigned long tmout)
+{
+	u8 status;
+
+	status = bfin_ata_busy_wait(ap, ATA_BUSY, 300, 0);
+	while (status != 0xff && (status & ATA_BUSY) && tmout_pat > 0) {
+		msleep(50);
+		tmout_pat -= 50;
+		status = bfin_ata_busy_wait(ap, ATA_BUSY, 3, 0);
+	}
+
+	if (status != 0xff && (status & ATA_BUSY))
+		printf("port is slow to respond, please be patient "
+				"(Status 0x%x)\n", status);
+
+	while (status != 0xff && (status & ATA_BUSY) && tmout_pat > 0) {
+		msleep(50);
+		tmout_pat -= 50;
+		status = bfin_check_status(ap);
+	}
+
+	if (status == 0xff)
+		return -ENODEV;
+
+	if (status & ATA_BUSY) {
+		printf("port failed to respond "
+				"(%lu secs, Status 0x%x)\n",
+				DIV_ROUND_UP(tmout, 1000), status);
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+/**
+ *	bfin_dev_select - Select device 0/1 on ATA bus
+ *	@ap: ATA channel to manipulate
+ *	@device: ATA device (numbered from zero) to select
+ *
+ *	Note: Original code is ata_sff_dev_select().
+ */
+
+static void bfin_dev_select(struct ata_port *ap, unsigned int device)
+{
+	void __iomem *base = (void __iomem *)ap->ioaddr.ctl_addr;
+	u8 tmp;
+
+
+	if (device == 0)
+		tmp = ATA_DEVICE_OBS;
+	else
+		tmp = ATA_DEVICE_OBS | ATA_DEV1;
+
+	write_atapi_register(base, ATA_REG_DEVICE, tmp);
+	udelay(1);
+}
+
+/**
+ *	bfin_devchk - PATA device presence detection
+ *	@ap: ATA channel to examine
+ *	@device: Device to examine (starting at zero)
+ *
+ *	Note: Original code is ata_devchk().
+ */
+
+static unsigned int bfin_devchk(struct ata_port *ap,
+				unsigned int device)
+{
+	void __iomem *base = (void __iomem *)ap->ioaddr.ctl_addr;
+	u8 nsect, lbal;
+
+	bfin_dev_select(ap, device);
+
+	write_atapi_register(base, ATA_REG_NSECT, 0x55);
+	write_atapi_register(base, ATA_REG_LBAL, 0xaa);
+
+	write_atapi_register(base, ATA_REG_NSECT, 0xaa);
+	write_atapi_register(base, ATA_REG_LBAL, 0x55);
+
+	write_atapi_register(base, ATA_REG_NSECT, 0x55);
+	write_atapi_register(base, ATA_REG_LBAL, 0xaa);
+
+	nsect = read_atapi_register(base, ATA_REG_NSECT);
+	lbal = read_atapi_register(base, ATA_REG_LBAL);
+
+	if ((nsect == 0x55) && (lbal == 0xaa))
+		return 1;	/* we found a device */
+
+	return 0;		/* nothing found */
+}
+
+/**
+ *	bfin_bus_post_reset - PATA device post reset
+ *
+ *	Note: Original code is ata_bus_post_reset().
+ */
+
+static void bfin_bus_post_reset(struct ata_port *ap, unsigned int devmask)
+{
+	void __iomem *base = (void __iomem *)ap->ioaddr.ctl_addr;
+	unsigned int dev0 = devmask & (1 << 0);
+	unsigned int dev1 = devmask & (1 << 1);
+	long deadline;
+
+	/* if device 0 was found in ata_devchk, wait for its
+	 * BSY bit to clear
+	 */
+	if (dev0)
+		bfin_ata_busy_sleep(ap, ATA_TMOUT_BOOT_QUICK, ATA_TMOUT_BOOT);
+
+	/* if device 1 was found in ata_devchk, wait for
+	 * register access, then wait for BSY to clear
+	 */
+	deadline = ATA_TMOUT_BOOT;
+	while (dev1) {
+		u8 nsect, lbal;
+
+		bfin_dev_select(ap, 1);
+		nsect = read_atapi_register(base, ATA_REG_NSECT);
+		lbal = read_atapi_register(base, ATA_REG_LBAL);
+		if ((nsect == 1) && (lbal == 1))
+			break;
+		if (deadline <= 0) {
+			dev1 = 0;
+			break;
+		}
+		msleep(50);	/* give drive a breather */
+		deadline -= 50;
+	}
+	if (dev1)
+		bfin_ata_busy_sleep(ap, ATA_TMOUT_BOOT_QUICK, ATA_TMOUT_BOOT);
+
+	/* is all this really necessary? */
+	bfin_dev_select(ap, 0);
+	if (dev1)
+		bfin_dev_select(ap, 1);
+	if (dev0)
+		bfin_dev_select(ap, 0);
+}
+
+/**
+ *	bfin_bus_softreset - PATA device software reset
+ *
+ *	Note: Original code is ata_bus_softreset().
+ */
+
+static unsigned int bfin_bus_softreset(struct ata_port *ap,
+				       unsigned int devmask)
+{
+	void __iomem *base = (void __iomem *)ap->ioaddr.ctl_addr;
+
+	/* software reset.  causes dev0 to be selected */
+	write_atapi_register(base, ATA_REG_CTRL, ap->ctl_reg);
+	udelay(20);
+	write_atapi_register(base, ATA_REG_CTRL, ap->ctl_reg | ATA_SRST);
+	udelay(20);
+	write_atapi_register(base, ATA_REG_CTRL, ap->ctl_reg);
+
+	/* spec mandates ">= 2ms" before checking status.
+	 * We wait 150ms, because that was the magic delay used for
+	 * ATAPI devices in Hale Landis's ATADRVR, for the period of time
+	 * between when the ATA command register is written, and then
+	 * status is checked.  Because waiting for "a while" before
+	 * checking status is fine, post SRST, we perform this magic
+	 * delay here as well.
+	 *
+	 * Old drivers/ide uses the 2mS rule and then waits for ready
+	 */
+	msleep(150);
+
+	/* Before we perform post reset processing we want to see if
+	 * the bus shows 0xFF because the odd clown forgets the D7
+	 * pulldown resistor.
+	 */
+	if (bfin_check_status(ap) == 0xFF)
+		return 0;
+
+	bfin_bus_post_reset(ap, devmask);
+
+	return 0;
+}
+
+/**
+ *	bfin_softreset - reset host port via ATA SRST
+ *	@ap: port to reset
+ *
+ *	Note: Original code is ata_sff_softreset().
+ */
+
+static int bfin_softreset(struct ata_port *ap)
+{
+	unsigned int err_mask;
+
+	ap->dev_mask = 0;
+
+	/* determine if device 0/1 are present.
+	 * only one device is supported on one port by now.
+	*/
+	if (bfin_devchk(ap, 0))
+		ap->dev_mask |= (1 << 0);
+	else if (bfin_devchk(ap, 1))
+		ap->dev_mask |= (1 << 1);
+	else
+		return -ENODEV;
+
+	/* select device 0 again */
+	bfin_dev_select(ap, 0);
+
+	/* issue bus reset */
+	err_mask = bfin_bus_softreset(ap, ap->dev_mask);
+	if (err_mask) {
+		printf("SRST failed (err_mask=0x%x)\n",
+				err_mask);
+		ap->dev_mask = 0;
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ *	bfin_irq_clear - Clear ATAPI interrupt.
+ *	@ap: Port associated with this ATA transaction.
+ *
+ *	Note: Original code is ata_sff_irq_clear().
+ */
+
+static void bfin_irq_clear(struct ata_port *ap)
+{
+	void __iomem *base = (void __iomem *)ap->ioaddr.ctl_addr;
+
+	ATAPI_SET_INT_STATUS(base, ATAPI_GET_INT_STATUS(base)|ATAPI_DEV_INT
+		| MULTI_DONE_INT | UDMAIN_DONE_INT | UDMAOUT_DONE_INT
+		| MULTI_TERM_INT | UDMAIN_TERM_INT | UDMAOUT_TERM_INT);
+}
+
+static u8 bfin_wait_for_irq(struct ata_port *ap, unsigned int max)
+{
+	void __iomem *base = (void __iomem *)ap->ioaddr.ctl_addr;
+
+	do {
+		if (ATAPI_GET_INT_STATUS(base) & (ATAPI_DEV_INT
+		| MULTI_DONE_INT | UDMAIN_DONE_INT | UDMAOUT_DONE_INT
+		| MULTI_TERM_INT | UDMAIN_TERM_INT | UDMAOUT_TERM_INT)) {
+			break;
+		}
+		udelay(1000);
+		max--;
+	} while ((max > 0));
+
+	return max == 0;
+}
+
+/**
+ *	bfin_ata_reset_port - initialize BFIN ATAPI port.
+ */
+
+static int bfin_ata_reset_port(struct ata_port *ap)
+{
+	void __iomem *base = (void __iomem *)ap->ioaddr.ctl_addr;
+	int count;
+	unsigned short status;
+
+	/* Disable all ATAPI interrupts */
+	ATAPI_SET_INT_MASK(base, 0);
+	SSYNC();
+
+	/* Assert the RESET signal 25us*/
+	ATAPI_SET_CONTROL(base, ATAPI_GET_CONTROL(base) | DEV_RST);
+	udelay(30);
+
+	/* Negate the RESET signal for 2ms*/
+	ATAPI_SET_CONTROL(base, ATAPI_GET_CONTROL(base) & ~DEV_RST);
+	msleep(2);
+
+	/* Wait on Busy flag to clear */
+	count = 10000000;
+	do {
+		status = read_atapi_register(base, ATA_REG_STATUS);
+	} while (--count && (status & ATA_BUSY));
+
+	/* Enable only ATAPI Device interrupt */
+	ATAPI_SET_INT_MASK(base, 1);
+	SSYNC();
+
+	return !count;
+}
+
+/**
+ *
+ *	Function:       bfin_config_atapi_gpio
+ *
+ *	Description:    Configures the ATAPI pins for use
+ *
+ */
+static int bfin_config_atapi_gpio(struct ata_port *ap)
+{
+	bfin_write_PORTH_FER(bfin_read_PORTH_FER() | 0x4);
+	bfin_write_PORTH_MUX(bfin_read_PORTH_MUX() & ~0x30);
+	bfin_write_PORTH_DIR_SET(0x4);
+
+	bfin_write_PORTJ_FER(0x7f8);
+	bfin_write_PORTJ_MUX(bfin_read_PORTI_MUX() & ~0x3fffc0);
+	bfin_write_PORTJ_DIR_SET(0x5f8);
+	bfin_write_PORTJ_DIR_CLEAR(0x200);
+	bfin_write_PORTJ_INEN(0x200);
+
+	bfin_write_PINT2_ASSIGN(0x0707);
+	bfin_write_PINT2_MASK_SET(0x200);
+	SSYNC();
+
+	return 0;
+}
+
+/**
+ *	bfin_atapi_probe	-	attach a bfin atapi interface
+ *	@pdev: platform device
+ *
+ *	Register a bfin atapi interface.
+ *
+ *
+ *	Platform devices are expected to contain 2 resources per port:
+ *
+ *		- I/O Base (IORESOURCE_IO)
+ *		- IRQ	   (IORESOURCE_IRQ)
+ *
+ */
+static int bfin_ata_probe_port(struct ata_port *ap)
+{
+	if (bfin_config_atapi_gpio(ap)) {
+		printf("Requesting Peripherals faild\n");
+		return -EFAULT;
+	}
+
+	if (bfin_ata_reset_port(ap)) {
+		printf("Fail to reset ATAPI device\n");
+		return -EFAULT;
+	}
+
+	if (ap->ata_mode >= XFER_PIO_0 && ap->ata_mode <= XFER_PIO_4)
+		bfin_set_piomode(ap, ap->ata_mode);
+	else {
+		printf("Given ATA data transfer mode is not supported.\n");
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+#define ATA_SECTOR_WORDS (ATA_SECT_SIZE/2)
+
+static void bfin_ata_identify(struct ata_port *ap, int dev)
+{
+	void __iomem *base = (void __iomem *)ap->ioaddr.ctl_addr;
+	u8 status = 0;
+	static u16 iobuf[ATA_SECTOR_WORDS];
+	u64 n_sectors = 0;
+	hd_driveid_t *iop = (hd_driveid_t *)iobuf;
+
+	memset(iobuf, 0, sizeof(iobuf));
+
+	if (!(ap->dev_mask & (1 << dev)))
+		return;
+
+	debug("port=%d dev=%d\n", ap->port_no, dev);
+
+	bfin_dev_select(ap, dev);
+
+	status = 0;
+	/* Device Identify Command */
+	write_atapi_register(base, ATA_REG_CMD, ATA_CMD_ID_ATA);
+	bfin_check_altstatus(ap);
+	udelay(10);
+
+	status = bfin_ata_busy_wait(ap, ATA_BUSY, 1000, 0);
+	if (status & ATA_ERR) {
+		printf("\ndevice not responding\n");
+		ap->dev_mask &= ~(1 << dev);
+		return;
+	}
+
+	read_atapi_data(base, ATA_SECTOR_WORDS, iobuf);
+
+	ata_swap_buf_le16(iobuf, ATA_SECTOR_WORDS);
+
+	/* we require LBA and DMA support (bits 8 & 9 of word 49) */
+	if (!ata_id_has_dma(iobuf) || !ata_id_has_lba(iobuf))
+		printf("ata%u: no dma/lba\n", ap->port_no);
+
+#ifdef DEBUG
+	ata_dump_id(iobuf);
+#endif
+
+	n_sectors = ata_id_n_sectors(iobuf);
+
+	if (n_sectors == 0) {
+		ap->dev_mask &= ~(1 << dev);
+		return;
+	}
+
+	ata_id_c_string(iobuf, (unsigned char *)sata_dev_desc[ap->port_no].revision,
+			 ATA_ID_FW_REV, sizeof(sata_dev_desc[ap->port_no].revision));
+	ata_id_c_string(iobuf, (unsigned char *)sata_dev_desc[ap->port_no].vendor,
+			 ATA_ID_PROD, sizeof(sata_dev_desc[ap->port_no].vendor));
+	ata_id_c_string(iobuf, (unsigned char *)sata_dev_desc[ap->port_no].product,
+			 ATA_ID_SERNO, sizeof(sata_dev_desc[ap->port_no].product));
+
+	if ((iop->config & 0x0080) == 0x0080)
+		sata_dev_desc[ap->port_no].removable = 1;
+	else
+		sata_dev_desc[ap->port_no].removable = 0;
+
+	sata_dev_desc[ap->port_no].lba = (u32) n_sectors;
+	debug("lba=0x%x\n", sata_dev_desc[ap->port_no].lba);
+
+#ifdef CONFIG_LBA48
+	if (iop->command_set_2 & 0x0400)
+		sata_dev_desc[ap->port_no].lba48 = 1;
+	else
+		sata_dev_desc[ap->port_no].lba48 = 0;
+#endif
+
+	/* assuming HD */
+	sata_dev_desc[ap->port_no].type = DEV_TYPE_HARDDISK;
+	sata_dev_desc[ap->port_no].blksz = ATA_SECT_SIZE;
+	sata_dev_desc[ap->port_no].lun = 0;	/* just to fill something in... */
+
+	printf("PATA device#%d %s is found on ata port#%d.\n",
+		ap->port_no%PATA_DEV_NUM_PER_PORT,
+		sata_dev_desc[ap->port_no].vendor,
+		ap->port_no/PATA_DEV_NUM_PER_PORT);
+}
+
+static void bfin_ata_set_Feature_cmd(struct ata_port *ap, int dev)
+{
+	void __iomem *base = (void __iomem *)ap->ioaddr.ctl_addr;
+	u8 status = 0;
+
+	if (!(ap->dev_mask & (1 << dev)))
+		return;
+
+	bfin_dev_select(ap, dev);
+
+	write_atapi_register(base, ATA_REG_FEATURE, SETFEATURES_XFER);
+	write_atapi_register(base, ATA_REG_NSECT, ap->ata_mode);
+	write_atapi_register(base, ATA_REG_LBAL, 0);
+	write_atapi_register(base, ATA_REG_LBAM, 0);
+	write_atapi_register(base, ATA_REG_LBAH, 0);
+
+	write_atapi_register(base, ATA_REG_DEVICE, ATA_DEVICE_OBS);
+	write_atapi_register(base, ATA_REG_CMD, ATA_CMD_SET_FEATURES);
+
+	udelay(50);
+	msleep(150);
+
+	status = bfin_ata_busy_wait(ap, ATA_BUSY, 5000, 0);
+	if ((status & (ATA_BUSY | ATA_ERR))) {
+		printf("Error  : status 0x%02x\n", status);
+		ap->dev_mask &= ~(1 << dev);
+	}
+}
+
+int scan_sata(int dev)
+{
+	/* dev is the index of each ata device in the system. one PATA port
+	 * contains 2 devices. one element in scan_done array indicates one
+	 * PATA port. device connected to one PATA port is selected by
+	 * bfin_dev_select() before access.
+	 */
+	struct ata_port *ap = &port[dev];
+	static int scan_done[(CONFIG_SYS_SATA_MAX_DEVICE+1)/PATA_DEV_NUM_PER_PORT];
+
+	if (scan_done[dev/PATA_DEV_NUM_PER_PORT])
+		return 0;
+
+	/* Check for attached device */
+	if (!bfin_ata_probe_port(ap)) {
+		if (bfin_softreset(ap)) {
+			/* soft reset failed, try a hard one */
+			bfin_ata_reset_port(ap);
+			if (bfin_softreset(ap))
+				scan_done[dev/PATA_DEV_NUM_PER_PORT] = 1;
+		} else {
+			scan_done[dev/PATA_DEV_NUM_PER_PORT] = 1;
+		}
+	}
+	if (scan_done[dev/PATA_DEV_NUM_PER_PORT]) {
+		/* Probe device and set xfer mode */
+		bfin_ata_identify(ap, dev%PATA_DEV_NUM_PER_PORT);
+		bfin_ata_set_Feature_cmd(ap, dev%PATA_DEV_NUM_PER_PORT);
+		init_part(&sata_dev_desc[dev]);
+		return 0;
+	}
+
+	printf("PATA device#%d is not present on ATA port#%d.\n",
+		ap->port_no%PATA_DEV_NUM_PER_PORT,
+		ap->port_no/PATA_DEV_NUM_PER_PORT);
+
+	return -1;
+}
+
+int init_sata(int dev)
+{
+	struct ata_port *ap = &port[dev];
+	static u8 init_done;
+	int res = 1;
+
+	if (init_done)
+		return res;
+
+	init_done = 1;
+
+	switch (dev/PATA_DEV_NUM_PER_PORT) {
+	case 0:
+		ap->ioaddr.ctl_addr = ATAPI_CONTROL;
+		ap->ata_mode = CONFIG_BFIN_ATA_MODE;
+		break;
+	default:
+		printf("Tried to scan unknown port %d.\n", dev);
+		return res;
+	}
+
+	if (ap->ata_mode < XFER_PIO_0 || ap->ata_mode > XFER_PIO_4) {
+		ap->ata_mode = XFER_PIO_4;
+		printf("DMA mode is not supported. Set to PIO mode 4.\n");
+	}
+
+	ap->port_no = dev;
+	ap->ctl_reg = 0x8;	/*Default value of control reg */
+
+	res = 0;
+	return res;
+}
+
+/* Read up to 255 sectors
+ *
+ * Returns sectors read
+*/
+static u8 do_one_read(struct ata_port *ap, u64 blknr, u8 blkcnt, u16 *buffer,
+			uchar lba48)
+{
+	void __iomem *base = (void __iomem *)ap->ioaddr.ctl_addr;
+	u8 sr = 0;
+	u8 status;
+	u16 err = 0;
+
+	if (!(bfin_check_status(ap) & ATA_DRDY)) {
+		printf("Device ata%d not ready\n", ap->port_no);
+		return 0;
+	}
+
+	/* Set up transfer */
+#ifdef CONFIG_LBA48
+	if (lba48) {
+		/* write high bits */
+		write_atapi_register(base, ATA_REG_NSECT, 0);
+		write_atapi_register(base, ATA_REG_LBAL, (blknr >> 24) & 0xFF);
+		write_atapi_register(base, ATA_REG_LBAM, (blknr >> 32) & 0xFF);
+		write_atapi_register(base, ATA_REG_LBAH, (blknr >> 40) & 0xFF);
+	}
+#endif
+	write_atapi_register(base, ATA_REG_NSECT, blkcnt);
+	write_atapi_register(base, ATA_REG_LBAL, (blknr >> 0) & 0xFF);
+	write_atapi_register(base, ATA_REG_LBAM, (blknr >> 8) & 0xFF);
+	write_atapi_register(base, ATA_REG_LBAH, (blknr >> 16) & 0xFF);
+
+#ifdef CONFIG_LBA48
+	if (lba48) {
+		write_atapi_register(base, ATA_REG_DEVICE, ATA_LBA);
+		write_atapi_register(base, ATA_REG_CMD, ATA_CMD_PIO_READ_EXT);
+	} else
+#endif
+	{
+		write_atapi_register(base, ATA_REG_DEVICE, ATA_LBA | ((blknr >> 24) & 0xF));
+		write_atapi_register(base, ATA_REG_CMD, ATA_CMD_PIO_READ);
+	}
+	status = bfin_ata_busy_wait(ap, ATA_BUSY, 500000, 1);
+
+	if (status & (ATA_BUSY | ATA_ERR)) {
+		printf("Device %d not responding status 0x%x.\n", ap->port_no, status);
+		err = read_atapi_register(base, ATA_REG_ERR);
+		printf("Error reg = 0x%x\n", err);
+		return sr;
+	}
+
+	while (blkcnt--) {
+		if (bfin_wait_for_irq(ap, 500)) {
+			printf("ata%u irq failed\n", ap->port_no);
+			return sr;
+		}
+
+		status = bfin_check_status(ap);
+		if (status & ATA_ERR) {
+			err = read_atapi_register(base, ATA_REG_ERR);
+			printf("ata%u error %d\n", ap->port_no, err);
+			return sr;
+		}
+		bfin_irq_clear(ap);
+
+		/* Read one sector */
+		read_atapi_data(base, ATA_SECTOR_WORDS, buffer);
+		buffer += ATA_SECTOR_WORDS;
+		sr++;
+	}
+
+	return sr;
+}
+
+ulong sata_read(int dev, ulong block, ulong blkcnt, void *buff)
+{
+	struct ata_port *ap = &port[dev];
+	ulong n = 0, sread;
+	u16 *buffer = (u16 *) buff;
+	u8 status = 0;
+	u64 blknr = (u64) block;
+	unsigned char lba48 = 0;
+
+#ifdef CONFIG_LBA48
+	if (blknr > 0xfffffff) {
+		if (!sata_dev_desc[dev].lba48) {
+			printf("Drive doesn't support 48-bit addressing\n");
+			return 0;
+		}
+		/* more than 28 bits used, use 48bit mode */
+		lba48 = 1;
+	}
+#endif
+	bfin_dev_select(ap, dev%PATA_DEV_NUM_PER_PORT);
+
+	while (blkcnt > 0) {
+
+		if (blkcnt > 255)
+			sread = 255;
+		else
+			sread = blkcnt;
+
+		status = do_one_read(ap, blknr, sread, buffer, lba48);
+		if (status != sread) {
+			printf("Read failed\n");
+			return n;
+		}
+
+		blkcnt -= sread;
+		blknr += sread;
+		n += sread;
+		buffer += sread * ATA_SECTOR_WORDS;
+	}
+	return n;
+}
+
+ulong sata_write(int dev, ulong block, ulong blkcnt, const void *buff)
+{
+	struct ata_port *ap = &port[dev];
+	void __iomem *base = (void __iomem *)ap->ioaddr.ctl_addr;
+	ulong n = 0;
+	u16 *buffer = (u16 *) buff;
+	unsigned char status = 0;
+	u64 blknr = (u64) block;
+#ifdef CONFIG_LBA48
+	unsigned char lba48 = 0;
+
+	if (blknr > 0xfffffff) {
+		if (!sata_dev_desc[dev].lba48) {
+			printf("Drive doesn't support 48-bit addressing\n");
+			return 0;
+		}
+		/* more than 28 bits used, use 48bit mode */
+		lba48 = 1;
+	}
+#endif
+
+	bfin_dev_select(ap, dev%PATA_DEV_NUM_PER_PORT);
+
+	while (blkcnt-- > 0) {
+		status = bfin_ata_busy_wait(ap, ATA_BUSY, 50000, 0);
+		if (status & ATA_BUSY) {
+			printf("ata%u failed to respond\n", ap->port_no);
+			return n;
+		}
+#ifdef CONFIG_LBA48
+		if (lba48) {
+			/* write high bits */
+			write_atapi_register(base, ATA_REG_NSECT, 0);
+			write_atapi_register(base, ATA_REG_LBAL,
+				(blknr >> 24) & 0xFF);
+			write_atapi_register(base, ATA_REG_LBAM,
+				(blknr >> 32) & 0xFF);
+			write_atapi_register(base, ATA_REG_LBAH,
+				(blknr >> 40) & 0xFF);
+		}
+#endif
+		write_atapi_register(base, ATA_REG_NSECT, 1);
+		write_atapi_register(base, ATA_REG_LBAL, (blknr >> 0) & 0xFF);
+		write_atapi_register(base, ATA_REG_LBAM, (blknr >> 8) & 0xFF);
+		write_atapi_register(base, ATA_REG_LBAH, (blknr >> 16) & 0xFF);
+#ifdef CONFIG_LBA48
+		if (lba48) {
+			write_atapi_register(base, ATA_REG_DEVICE, ATA_LBA);
+			write_atapi_register(base, ATA_REG_CMD,
+				ATA_CMD_PIO_WRITE_EXT);
+		} else
+#endif
+		{
+			write_atapi_register(base, ATA_REG_DEVICE,
+				ATA_LBA | ((blknr >> 24) & 0xF));
+			write_atapi_register(base, ATA_REG_CMD,
+				ATA_CMD_PIO_WRITE);
+		}
+
+		/*may take up to 5 sec */
+		status = bfin_ata_busy_wait(ap, ATA_BUSY, 50000, 0);
+		if ((status & (ATA_DRQ | ATA_BUSY | ATA_ERR)) != ATA_DRQ) {
+			printf("Error no DRQ dev %d blk %ld: sts 0x%02x\n",
+				ap->port_no, (ulong) blknr, status);
+			return n;
+		}
+
+		write_atapi_data(base, ATA_SECTOR_WORDS, buffer);
+		bfin_check_altstatus(ap);
+		udelay(1);
+
+		++n;
+		++blknr;
+		buffer += ATA_SECTOR_WORDS;
+	}
+	return n;
+}
diff --git a/drivers/block/pata_bfin.h b/drivers/block/pata_bfin.h
new file mode 100644
index 0000000..2b3425b
--- /dev/null
+++ b/drivers/block/pata_bfin.h
@@ -0,0 +1,173 @@
+/*
+ * Driver for Blackfin on-chip ATAPI controller.
+ *
+ * Enter bugs at http://blackfin.uclinux.org/
+ *
+ * Copyright (c) 2008 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#ifndef PATA_BFIN_H
+#define PATA_BFIN_H
+
+#include <asm/blackfin_local.h>
+
+struct ata_ioports {
+	unsigned long cmd_addr;
+	unsigned long data_addr;
+	unsigned long error_addr;
+	unsigned long feature_addr;
+	unsigned long nsect_addr;
+	unsigned long lbal_addr;
+	unsigned long lbam_addr;
+	unsigned long lbah_addr;
+	unsigned long device_addr;
+	unsigned long status_addr;
+	unsigned long command_addr;
+	unsigned long altstatus_addr;
+	unsigned long ctl_addr;
+	unsigned long bmdma_addr;
+	unsigned long scr_addr;
+};
+
+struct ata_port {
+	unsigned int port_no;		/* primary=0, secondary=1       */
+	struct ata_ioports ioaddr;	/* ATA cmd/ctl/dma reg blks     */
+	unsigned long flag;
+	unsigned int ata_mode;
+	unsigned char ctl_reg;
+	unsigned char last_ctl;
+	unsigned char dev_mask;
+};
+
+extern block_dev_desc_t sata_dev_desc[CONFIG_SYS_SATA_MAX_DEVICE];
+
+#define DRV_NAME		"pata-bfin"
+#define DRV_VERSION		"0.9"
+#define __iomem
+
+#define ATA_REG_CTRL		0x0E
+#define ATA_REG_ALTSTATUS	ATA_REG_CTRL
+#define ATA_TMOUT_BOOT		30000
+#define ATA_TMOUT_BOOT_QUICK	7000
+
+#define PATA_BFIN_WAIT_TIMEOUT		10000
+#define PATA_DEV_NUM_PER_PORT	2
+
+/* These are the offset of the controller's registers */
+#define ATAPI_OFFSET_CONTROL		0x00
+#define ATAPI_OFFSET_STATUS		0x04
+#define ATAPI_OFFSET_DEV_ADDR		0x08
+#define ATAPI_OFFSET_DEV_TXBUF		0x0c
+#define ATAPI_OFFSET_DEV_RXBUF		0x10
+#define ATAPI_OFFSET_INT_MASK		0x14
+#define ATAPI_OFFSET_INT_STATUS		0x18
+#define ATAPI_OFFSET_XFER_LEN		0x1c
+#define ATAPI_OFFSET_LINE_STATUS	0x20
+#define ATAPI_OFFSET_SM_STATE		0x24
+#define ATAPI_OFFSET_TERMINATE		0x28
+#define ATAPI_OFFSET_PIO_TFRCNT		0x2c
+#define ATAPI_OFFSET_DMA_TFRCNT		0x30
+#define ATAPI_OFFSET_UMAIN_TFRCNT	0x34
+#define ATAPI_OFFSET_UDMAOUT_TFRCNT	0x38
+#define ATAPI_OFFSET_REG_TIM_0		0x40
+#define ATAPI_OFFSET_PIO_TIM_0		0x44
+#define ATAPI_OFFSET_PIO_TIM_1		0x48
+#define ATAPI_OFFSET_MULTI_TIM_0	0x50
+#define ATAPI_OFFSET_MULTI_TIM_1	0x54
+#define ATAPI_OFFSET_MULTI_TIM_2	0x58
+#define ATAPI_OFFSET_ULTRA_TIM_0	0x60
+#define ATAPI_OFFSET_ULTRA_TIM_1	0x64
+#define ATAPI_OFFSET_ULTRA_TIM_2	0x68
+#define ATAPI_OFFSET_ULTRA_TIM_3	0x6c
+
+
+#define ATAPI_GET_CONTROL(base)\
+	bfin_read16(base + ATAPI_OFFSET_CONTROL)
+#define ATAPI_SET_CONTROL(base, val)\
+	bfin_write16(base + ATAPI_OFFSET_CONTROL, val)
+#define ATAPI_GET_STATUS(base)\
+	bfin_read16(base + ATAPI_OFFSET_STATUS)
+#define ATAPI_GET_DEV_ADDR(base)\
+	bfin_read16(base + ATAPI_OFFSET_DEV_ADDR)
+#define ATAPI_SET_DEV_ADDR(base, val)\
+	bfin_write16(base + ATAPI_OFFSET_DEV_ADDR, val)
+#define ATAPI_GET_DEV_TXBUF(base)\
+	bfin_read16(base + ATAPI_OFFSET_DEV_TXBUF)
+#define ATAPI_SET_DEV_TXBUF(base, val)\
+	bfin_write16(base + ATAPI_OFFSET_DEV_TXBUF, val)
+#define ATAPI_GET_DEV_RXBUF(base)\
+	bfin_read16(base + ATAPI_OFFSET_DEV_RXBUF)
+#define ATAPI_SET_DEV_RXBUF(base, val)\
+	bfin_write16(base + ATAPI_OFFSET_DEV_RXBUF, val)
+#define ATAPI_GET_INT_MASK(base)\
+	bfin_read16(base + ATAPI_OFFSET_INT_MASK)
+#define ATAPI_SET_INT_MASK(base, val)\
+	bfin_write16(base + ATAPI_OFFSET_INT_MASK, val)
+#define ATAPI_GET_INT_STATUS(base)\
+	bfin_read16(base + ATAPI_OFFSET_INT_STATUS)
+#define ATAPI_SET_INT_STATUS(base, val)\
+	bfin_write16(base + ATAPI_OFFSET_INT_STATUS, val)
+#define ATAPI_GET_XFER_LEN(base)\
+	bfin_read16(base + ATAPI_OFFSET_XFER_LEN)
+#define ATAPI_SET_XFER_LEN(base, val)\
+	bfin_write16(base + ATAPI_OFFSET_XFER_LEN, val)
+#define ATAPI_GET_LINE_STATUS(base)\
+	bfin_read16(base + ATAPI_OFFSET_LINE_STATUS)
+#define ATAPI_GET_SM_STATE(base)\
+	bfin_read16(base + ATAPI_OFFSET_SM_STATE)
+#define ATAPI_GET_TERMINATE(base)\
+	bfin_read16(base + ATAPI_OFFSET_TERMINATE)
+#define ATAPI_SET_TERMINATE(base, val)\
+	bfin_write16(base + ATAPI_OFFSET_TERMINATE, val)
+#define ATAPI_GET_PIO_TFRCNT(base)\
+	bfin_read16(base + ATAPI_OFFSET_PIO_TFRCNT)
+#define ATAPI_GET_DMA_TFRCNT(base)\
+	bfin_read16(base + ATAPI_OFFSET_DMA_TFRCNT)
+#define ATAPI_GET_UMAIN_TFRCNT(base)\
+	bfin_read16(base + ATAPI_OFFSET_UMAIN_TFRCNT)
+#define ATAPI_GET_UDMAOUT_TFRCNT(base)\
+	bfin_read16(base + ATAPI_OFFSET_UDMAOUT_TFRCNT)
+#define ATAPI_GET_REG_TIM_0(base)\
+	bfin_read16(base + ATAPI_OFFSET_REG_TIM_0)
+#define ATAPI_SET_REG_TIM_0(base, val)\
+	bfin_write16(base + ATAPI_OFFSET_REG_TIM_0, val)
+#define ATAPI_GET_PIO_TIM_0(base)\
+	bfin_read16(base + ATAPI_OFFSET_PIO_TIM_0)
+#define ATAPI_SET_PIO_TIM_0(base, val)\
+	bfin_write16(base + ATAPI_OFFSET_PIO_TIM_0, val)
+#define ATAPI_GET_PIO_TIM_1(base)\
+	bfin_read16(base + ATAPI_OFFSET_PIO_TIM_1)
+#define ATAPI_SET_PIO_TIM_1(base, val)\
+	bfin_write16(base + ATAPI_OFFSET_PIO_TIM_1, val)
+#define ATAPI_GET_MULTI_TIM_0(base)\
+	bfin_read16(base + ATAPI_OFFSET_MULTI_TIM_0)
+#define ATAPI_SET_MULTI_TIM_0(base, val)\
+	bfin_write16(base + ATAPI_OFFSET_MULTI_TIM_0, val)
+#define ATAPI_GET_MULTI_TIM_1(base)\
+	bfin_read16(base + ATAPI_OFFSET_MULTI_TIM_1)
+#define ATAPI_SET_MULTI_TIM_1(base, val)\
+	bfin_write16(base + ATAPI_OFFSET_MULTI_TIM_1, val)
+#define ATAPI_GET_MULTI_TIM_2(base)\
+	bfin_read16(base + ATAPI_OFFSET_MULTI_TIM_2)
+#define ATAPI_SET_MULTI_TIM_2(base, val)\
+	bfin_write16(base + ATAPI_OFFSET_MULTI_TIM_2, val)
+#define ATAPI_GET_ULTRA_TIM_0(base)\
+	bfin_read16(base + ATAPI_OFFSET_ULTRA_TIM_0)
+#define ATAPI_SET_ULTRA_TIM_0(base, val)\
+	bfin_write16(base + ATAPI_OFFSET_ULTRA_TIM_0, val)
+#define ATAPI_GET_ULTRA_TIM_1(base)\
+	bfin_read16(base + ATAPI_OFFSET_ULTRA_TIM_1)
+#define ATAPI_SET_ULTRA_TIM_1(base, val)\
+	bfin_write16(base + ATAPI_OFFSET_ULTRA_TIM_1, val)
+#define ATAPI_GET_ULTRA_TIM_2(base)\
+	bfin_read16(base + ATAPI_OFFSET_ULTRA_TIM_2)
+#define ATAPI_SET_ULTRA_TIM_2(base, val)\
+	bfin_write16(base + ATAPI_OFFSET_ULTRA_TIM_2, val)
+#define ATAPI_GET_ULTRA_TIM_3(base)\
+	bfin_read16(base + ATAPI_OFFSET_ULTRA_TIM_3)
+#define ATAPI_SET_ULTRA_TIM_3(base, val)\
+	bfin_write16(base + ATAPI_OFFSET_ULTRA_TIM_3, val)
+
+#endif
diff --git a/drivers/mmc/Makefile b/drivers/mmc/Makefile
index bb0d52e..d496364 100644
--- a/drivers/mmc/Makefile
+++ b/drivers/mmc/Makefile
@@ -26,6 +26,7 @@
 LIB	:= $(obj)libmmc.a
 
 COBJS-$(CONFIG_ATMEL_MCI) += atmel_mci.o
+COBJS-$(CONFIG_BFIN_SDH) += bfin_sdh.o
 COBJS-$(CONFIG_OMAP3_MMC) += omap3_mmc.o
 
 COBJS	:= $(COBJS-y)
diff --git a/drivers/mmc/bfin_sdh.c b/drivers/mmc/bfin_sdh.c
new file mode 100644
index 0000000..7d6b495
--- /dev/null
+++ b/drivers/mmc/bfin_sdh.c
@@ -0,0 +1,546 @@
+/*
+ * Driver for Blackfin on-chip SDH controller
+ *
+ * Copyright (c) 2008 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <common.h>
+#include <malloc.h>
+#include <part.h>
+#include <mmc.h>
+
+#include <asm/io.h>
+#include <asm/errno.h>
+#include <asm/byteorder.h>
+#include <asm/blackfin.h>
+#include <asm/mach-common/bits/sdh.h>
+#include <asm/mach-common/bits/dma.h>
+
+#include "bfin_sdh.h"
+
+/* SD_CLK frequency must be less than 400k in identification mode */
+#ifndef CONFIG_SYS_MMC_CLK_ID
+#define CONFIG_SYS_MMC_CLK_ID		200000
+#endif
+/* SD_CLK for normal working */
+#ifndef CONFIG_SYS_MMC_CLK_OP
+#define CONFIG_SYS_MMC_CLK_OP		25000000
+#endif
+/* support 3.2-3.3V and 3.3-3.4V */
+#define CONFIG_SYS_MMC_OP_COND		0x00300000
+#define MMC_DEFAULT_RCA		1
+
+#if defined(__ADSPBF51x__)
+# define bfin_read_SDH_PWR_CTL		bfin_read_RSI_PWR_CONTROL
+# define bfin_write_SDH_PWR_CTL		bfin_write_RSI_PWR_CONTROL
+# define bfin_read_SDH_CLK_CTL		bfin_read_RSI_CLK_CONTROL
+# define bfin_write_SDH_CLK_CTL		bfin_write_RSI_CLK_CONTROL
+# define bfin_write_SDH_ARGUMENT	bfin_write_RSI_ARGUMENT
+# define bfin_write_SDH_COMMAND		bfin_write_RSI_COMMAND
+# define bfin_read_SDH_RESPONSE0	bfin_read_RSI_RESPONSE0
+# define bfin_read_SDH_RESPONSE1	bfin_read_RSI_RESPONSE1
+# define bfin_read_SDH_RESPONSE2	bfin_read_RSI_RESPONSE2
+# define bfin_read_SDH_RESPONSE3	bfin_read_RSI_RESPONSE3
+# define bfin_write_SDH_DATA_TIMER	bfin_write_RSI_DATA_TIMER
+# define bfin_write_SDH_DATA_LGTH	bfin_write_RSI_DATA_LGTH
+# define bfin_read_SDH_DATA_CTL		bfin_read_RSI_DATA_CONTROL
+# define bfin_write_SDH_DATA_CTL	bfin_write_RSI_DATA_CONTROL
+# define bfin_read_SDH_STATUS		bfin_read_RSI_STATUS
+# define bfin_write_SDH_STATUS_CLR 	bfin_write_RSI_STATUSCL
+# define bfin_read_SDH_CFG		bfin_read_RSI_CONFIG
+# define bfin_write_SDH_CFG		bfin_write_RSI_CONFIG
+# define bfin_write_DMA_START_ADDR	bfin_write_DMA4_START_ADDR
+# define bfin_write_DMA_X_COUNT		bfin_write_DMA4_X_COUNT
+# define bfin_write_DMA_X_MODIFY	bfin_write_DMA4_X_MODIFY
+# define bfin_write_DMA_CONFIG		bfin_write_DMA4_CONFIG
+#elif defined(__ADSPBF54x__)
+# define bfin_write_DMA_START_ADDR	bfin_write_DMA22_START_ADDR
+# define bfin_write_DMA_X_COUNT		bfin_write_DMA22_X_COUNT
+# define bfin_write_DMA_X_MODIFY	bfin_write_DMA22_X_MODIFY
+# define bfin_write_DMA_CONFIG		bfin_write_DMA22_CONFIG
+#else
+# error no support for this proc yet
+#endif
+
+static unsigned int mmc_rca;
+static int mmc_card_is_sd;
+static block_dev_desc_t mmc_blkdev;
+struct mmc_cid cid;
+static __u32 csd[4];
+
+#define get_bits(resp, start, size)					\
+	({								\
+		const int __size = size;				\
+		const uint32_t __mask = (__size < 32 ? 1 << __size : 0) - 1;	\
+		const int32_t __off = 3 - ((start) / 32);			\
+		const int32_t __shft = (start) & 31;			\
+		uint32_t __res;						\
+									\
+		__res = resp[__off] >> __shft;				\
+		if (__size + __shft > 32)				\
+			__res |= resp[__off-1] << ((32 - __shft) % 32);	\
+		__res & __mask;						\
+	})
+
+
+block_dev_desc_t *mmc_get_dev(int dev)
+{
+	return &mmc_blkdev;
+}
+
+static void mci_set_clk(unsigned long clk)
+{
+	unsigned long sys_clk;
+	unsigned long clk_div;
+	__u16 clk_ctl = 0;
+
+	/* setting SD_CLK */
+	sys_clk = get_sclk();
+	bfin_write_SDH_CLK_CTL(0);
+	if (sys_clk % (2 * clk) == 0)
+		clk_div = sys_clk / (2 * clk) - 1;
+	else
+		clk_div = sys_clk / (2 * clk);
+
+	if (clk_div > 0xff)
+		clk_div = 0xff;
+	clk_ctl |= (clk_div & 0xff);
+	clk_ctl |= CLK_E;
+	bfin_write_SDH_CLK_CTL(clk_ctl);
+}
+
+static int
+mmc_cmd(unsigned long cmd, unsigned long arg, void *resp, unsigned long flags)
+{
+	unsigned int sdh_cmd;
+	unsigned int status;
+	int ret = 0;
+	sdh_cmd = 0;
+	unsigned long *response = resp;
+	sdh_cmd |= cmd;
+
+	if (flags & MMC_RSP_PRESENT)
+		sdh_cmd |= CMD_RSP;
+
+	if (flags & MMC_RSP_136)
+		sdh_cmd |= CMD_L_RSP;
+
+	bfin_write_SDH_ARGUMENT(arg);
+	bfin_write_SDH_COMMAND(sdh_cmd | CMD_E);
+
+	/* wait for a while */
+	do {
+		udelay(1);
+		status = bfin_read_SDH_STATUS();
+	} while (!(status & (CMD_SENT | CMD_RESP_END | CMD_TIME_OUT |
+		CMD_CRC_FAIL)));
+
+	if (flags & MMC_RSP_PRESENT) {
+		response[0] = bfin_read_SDH_RESPONSE0();
+		if (flags & MMC_RSP_136) {
+			response[1] = bfin_read_SDH_RESPONSE1();
+			response[2] = bfin_read_SDH_RESPONSE2();
+			response[3] = bfin_read_SDH_RESPONSE3();
+		}
+	}
+
+	if (status & CMD_TIME_OUT) {
+		printf("CMD%d timeout\n", (int)cmd);
+		ret |= -ETIMEDOUT;
+	} else if (status & CMD_CRC_FAIL && flags & MMC_RSP_CRC) {
+		printf("CMD%d CRC failure\n", (int)cmd);
+		ret |= -EILSEQ;
+	}
+	bfin_write_SDH_STATUS_CLR(CMD_SENT_STAT | CMD_RESP_END_STAT |
+				CMD_TIMEOUT_STAT | CMD_CRC_FAIL_STAT);
+	return ret;
+}
+
+static int
+mmc_acmd(unsigned long cmd, unsigned long arg, void *resp, unsigned long flags)
+{
+	unsigned long aresp[4];
+	int ret = 0;
+
+	ret = mmc_cmd(MMC_CMD_APP_CMD, 0, aresp,
+		      MMC_RSP_PRESENT);
+	if (ret)
+		return ret;
+
+	if ((aresp[0] & (ILLEGAL_COMMAND | APP_CMD)) != APP_CMD)
+		return -ENODEV;
+	ret = mmc_cmd(cmd, arg, resp, flags);
+	return ret;
+}
+
+static unsigned long
+mmc_bread(int dev, unsigned long start, lbaint_t blkcnt, void *buffer)
+{
+	int ret, i;
+	unsigned long resp[4];
+	unsigned long card_status;
+	__u8 *buf = buffer;
+	__u32 status;
+	__u16 data_ctl = 0;
+	__u16 dma_cfg = 0;
+
+	if (blkcnt == 0)
+		return 0;
+	debug("mmc_bread: dev %d, start %d, blkcnt %d\n", dev, start, blkcnt);
+	/* Force to use 512-byte block,because a lot of code depends on this */
+	data_ctl |= 9 << 4;
+	data_ctl |= DTX_DIR;
+	bfin_write_SDH_DATA_CTL(data_ctl);
+	dma_cfg |= WDSIZE_32 | RESTART | WNR | DMAEN;
+
+	/* FIXME later */
+	bfin_write_SDH_DATA_TIMER(0xFFFFFFFF);
+	for (i = 0; i < blkcnt; ++i, ++start) {
+		blackfin_dcache_flush_invalidate_range(buf + i * mmc_blkdev.blksz,
+			buf + (i + 1) * mmc_blkdev.blksz);
+		bfin_write_DMA_START_ADDR(buf + i * mmc_blkdev.blksz);
+		bfin_write_DMA_X_COUNT(mmc_blkdev.blksz / 4);
+		bfin_write_DMA_X_MODIFY(4);
+		bfin_write_DMA_CONFIG(dma_cfg);
+		bfin_write_SDH_DATA_LGTH(mmc_blkdev.blksz);
+		/* Put the device into Transfer state */
+		ret = mmc_cmd(MMC_CMD_SELECT_CARD, mmc_rca << 16, resp, MMC_RSP_R1);
+		if (ret) {
+			printf("MMC_CMD_SELECT_CARD failed\n");
+			goto out;
+		}
+		/* Set block length */
+		ret = mmc_cmd(MMC_CMD_SET_BLOCKLEN, mmc_blkdev.blksz, resp, MMC_RSP_R1);
+		if (ret) {
+			printf("MMC_CMD_SET_BLOCKLEN failed\n");
+			goto out;
+		}
+		ret = mmc_cmd(MMC_CMD_READ_SINGLE_BLOCK,
+			      start * mmc_blkdev.blksz, resp,
+			      MMC_RSP_R1);
+		if (ret) {
+			printf("MMC_CMD_READ_SINGLE_BLOCK failed\n");
+			goto out;
+		}
+		bfin_write_SDH_DATA_CTL(bfin_read_SDH_DATA_CTL() | DTX_DMA_E | DTX_E);
+
+		do {
+			udelay(1);
+			status = bfin_read_SDH_STATUS();
+		} while (!(status & (DAT_BLK_END | DAT_END | DAT_TIME_OUT | DAT_CRC_FAIL | RX_OVERRUN)));
+
+		if (status & (DAT_TIME_OUT | DAT_CRC_FAIL | RX_OVERRUN)) {
+			bfin_write_SDH_STATUS_CLR(DAT_TIMEOUT_STAT | \
+				DAT_CRC_FAIL_STAT | RX_OVERRUN_STAT);
+			goto read_error;
+		} else {
+			bfin_write_SDH_STATUS_CLR(DAT_BLK_END_STAT | DAT_END_STAT);
+			mmc_cmd(MMC_CMD_SELECT_CARD, 0, resp, 0);
+		}
+	}
+ out:
+
+	return i;
+
+ read_error:
+	mmc_cmd(MMC_CMD_SEND_STATUS, mmc_rca << 16, &card_status, MMC_RSP_R1);
+	printf("mmc: bread failed, status = %08x, card status = %08lx\n",
+	       status, card_status);
+	goto out;
+}
+
+static unsigned long
+mmc_bwrite(int dev, unsigned long start, lbaint_t blkcnt, const void *buffer)
+{
+	int ret, i = 0;
+	unsigned long resp[4];
+	unsigned long card_status;
+	const __u8 *buf = buffer;
+	__u32 status;
+	__u16 data_ctl = 0;
+	__u16 dma_cfg = 0;
+
+	if (blkcnt == 0)
+		return 0;
+
+	debug("mmc_bwrite: dev %d, start %lx, blkcnt %lx\n",
+		 dev, start, blkcnt);
+	/* Force to use 512-byte block,because a lot of code depends on this */
+	data_ctl |= 9 << 4;
+	data_ctl &= ~DTX_DIR;
+	bfin_write_SDH_DATA_CTL(data_ctl);
+	dma_cfg |= WDSIZE_32 | RESTART | DMAEN;
+	/* FIXME later */
+	bfin_write_SDH_DATA_TIMER(0xFFFFFFFF);
+	for (i = 0; i < blkcnt; ++i, ++start) {
+		bfin_write_DMA_START_ADDR(buf + i * mmc_blkdev.blksz);
+		bfin_write_DMA_X_COUNT(mmc_blkdev.blksz / 4);
+		bfin_write_DMA_X_MODIFY(4);
+		bfin_write_DMA_CONFIG(dma_cfg);
+		bfin_write_SDH_DATA_LGTH(mmc_blkdev.blksz);
+
+		/* Put the device into Transfer state */
+		ret = mmc_cmd(MMC_CMD_SELECT_CARD, mmc_rca << 16, resp, MMC_RSP_R1);
+		if (ret) {
+			printf("MMC_CMD_SELECT_CARD failed\n");
+			goto out;
+		}
+		/* Set block length */
+		ret = mmc_cmd(MMC_CMD_SET_BLOCKLEN, mmc_blkdev.blksz, resp, MMC_RSP_R1);
+		if (ret) {
+			printf("MMC_CMD_SET_BLOCKLEN failed\n");
+			goto out;
+		}
+		ret = mmc_cmd(MMC_CMD_WRITE_BLOCK,
+			      start * mmc_blkdev.blksz, resp,
+			      MMC_RSP_R1);
+		if (ret) {
+			printf("MMC_CMD_WRITE_SINGLE_BLOCK failed\n");
+			goto out;
+		}
+		bfin_write_SDH_DATA_CTL(bfin_read_SDH_DATA_CTL() | DTX_DMA_E | DTX_E);
+
+		do {
+			udelay(1);
+			status = bfin_read_SDH_STATUS();
+		} while (!(status & (DAT_BLK_END | DAT_END | DAT_TIME_OUT | DAT_CRC_FAIL | TX_UNDERRUN)));
+
+		if (status & (DAT_TIME_OUT | DAT_CRC_FAIL | TX_UNDERRUN)) {
+			bfin_write_SDH_STATUS_CLR(DAT_TIMEOUT_STAT |
+				DAT_CRC_FAIL_STAT | TX_UNDERRUN_STAT);
+			goto write_error;
+		} else {
+			bfin_write_SDH_STATUS_CLR(DAT_BLK_END_STAT | DAT_END_STAT);
+			mmc_cmd(MMC_CMD_SELECT_CARD, 0, resp, 0);
+		}
+	}
+ out:
+	return i;
+
+ write_error:
+	mmc_cmd(MMC_CMD_SEND_STATUS, mmc_rca << 16, &card_status, MMC_RSP_R1);
+	printf("mmc: bwrite failed, status = %08x, card status = %08lx\n",
+	       status, card_status);
+	goto out;
+}
+
+static void mmc_parse_cid(struct mmc_cid *cid, unsigned long *resp)
+{
+	cid->mid = resp[0] >> 24;
+	cid->oid = (resp[0] >> 8) & 0xffff;
+	cid->pnm[0] = resp[0];
+	cid->pnm[1] = resp[1] >> 24;
+	cid->pnm[2] = resp[1] >> 16;
+	cid->pnm[3] = resp[1] >> 8;
+	cid->pnm[4] = resp[1];
+	cid->pnm[5] = resp[2] >> 24;
+	cid->pnm[6] = 0;
+	cid->prv = resp[2] >> 16;
+	cid->psn = (resp[2] << 16) | (resp[3] >> 16);
+	cid->mdt = resp[3] >> 8;
+}
+
+static void sd_parse_cid(struct mmc_cid *cid, unsigned long *resp)
+{
+	cid->mid = resp[0] >> 24;
+	cid->oid = (resp[0] >> 8) & 0xffff;
+	cid->pnm[0] = resp[0];
+	cid->pnm[1] = resp[1] >> 24;
+	cid->pnm[2] = resp[1] >> 16;
+	cid->pnm[3] = resp[1] >> 8;
+	cid->pnm[4] = resp[1];
+	cid->pnm[5] = 0;
+	cid->pnm[6] = 0;
+	cid->prv = resp[2] >> 24;
+	cid->psn = (resp[2] << 8) | (resp[3] >> 24);
+	cid->mdt = (resp[3] >> 8) & 0x0fff;
+}
+
+static void mmc_dump_cid(const struct mmc_cid *cid)
+{
+	printf("CID information:\n");
+	printf("Manufacturer ID:       %02X\n", cid->mid);
+	printf("OEM/Application ID:    %04X\n", cid->oid);
+	printf("Product name:          %s\n", cid->pnm);
+	printf("Product Revision:      %u.%u\n",
+	       cid->prv >> 4, cid->prv & 0x0f);
+	printf("Product Serial Number: %lu\n", cid->psn);
+	printf("Manufacturing Date:    %02u/%02u\n",
+	       cid->mdt >> 4, cid->mdt & 0x0f);
+}
+
+static void mmc_dump_csd(__u32 *csd)
+{
+	printf("CSD information:\n");
+	printf("CSD structure version:   1.%u\n", get_bits(csd, 126, 2));
+	printf("Card command classes:    %03x\n", get_bits(csd, 84, 12));
+	printf("Max trans speed: %s\n", (get_bits(csd, 96, 8) == 0x32) ? "25MHz" : "50MHz");
+	printf("Read block length:       %d\n", 1 << get_bits(csd, 80, 4));
+	printf("Write block length:      %u\n", 1 << get_bits(csd, 22, 4));
+	printf("Card capacity:		%u bytes\n",
+	       (get_bits(csd, 62, 12) + 1) * (1 << (get_bits(csd, 47, 3) + 2)) *
+	       (1 << get_bits(csd, 80, 4)));
+	putc('\n');
+}
+
+static int mmc_idle_cards(void)
+{
+	int ret = 0;
+
+	/* Reset all cards */
+	ret = mmc_cmd(MMC_CMD_GO_IDLE_STATE, 0, NULL, 0);
+	if (ret)
+		return ret;
+	udelay(500);
+	return mmc_cmd(MMC_CMD_GO_IDLE_STATE, 0, NULL, 0);
+}
+
+static int sd_init_card(struct mmc_cid *cid, int verbose)
+{
+	unsigned long resp[4];
+	int i, ret = 0;
+
+	mmc_idle_cards();
+	for (i = 0; i < 1000; ++i) {
+		ret = mmc_acmd(SD_CMD_APP_SEND_OP_COND, CONFIG_SYS_MMC_OP_COND,
+			       resp, MMC_RSP_R3);
+		if (ret || (resp[0] & 0x80000000))
+			break;
+		ret = -ETIMEDOUT;
+	}
+	if (ret)
+		return ret;
+
+	ret = mmc_cmd(MMC_CMD_ALL_SEND_CID, 0, resp, MMC_RSP_R2);
+	if (ret)
+		return ret;
+	sd_parse_cid(cid, resp);
+	if (verbose)
+		mmc_dump_cid(cid);
+
+	/* Get RCA of the card that responded */
+	ret = mmc_cmd(SD_CMD_SEND_RELATIVE_ADDR, 0, resp, MMC_RSP_R6);
+	if (ret)
+		return ret;
+
+	mmc_rca = (resp[0] >> 16) & 0xffff;
+	if (verbose)
+		printf("SD Card detected (RCA %u)\n", mmc_rca);
+	mmc_card_is_sd = 1;
+	return 0;
+}
+
+static int mmc_init_card(struct mmc_cid *cid, int verbose)
+{
+	unsigned long resp[4];
+	int i, ret = 0;
+
+	mmc_idle_cards();
+	for (i = 0; i < 1000; ++i) {
+		ret = mmc_cmd(MMC_CMD_SEND_OP_COND, CONFIG_SYS_MMC_OP_COND, resp,
+			      MMC_RSP_R3);
+		if (ret || (resp[0] & 0x80000000))
+			break;
+		ret = -ETIMEDOUT;
+	}
+	if (ret)
+		return ret;
+
+	/* Get CID of all cards. FIXME: Support more than one card */
+	ret = mmc_cmd(MMC_CMD_ALL_SEND_CID, 0, resp, MMC_RSP_R2);
+	if (ret)
+		return ret;
+	mmc_parse_cid(cid, resp);
+	if (verbose)
+		mmc_dump_cid(cid);
+
+	/* Set Relative Address of the card that responded */
+	ret = mmc_cmd(MMC_CMD_SET_RELATIVE_ADDR, mmc_rca << 16, resp,
+		      MMC_RSP_R1);
+	return ret;
+}
+
+int mmc_init(int verbose)
+{
+	__u16 pwr_ctl = 0;
+	int ret;
+	unsigned int max_blksz;
+	/* Initialize sdh controller */
+#if defined(__ADSPBF54x__)
+	bfin_write_DMAC1_PERIMUX(bfin_read_DMAC1_PERIMUX() | 0x1);
+	bfin_write_PORTC_FER(bfin_read_PORTC_FER() | 0x3F00);
+	bfin_write_PORTC_MUX(bfin_read_PORTC_MUX() & ~0xFFF0000);
+#elif defined(__ADSPBF51x__)
+	bfin_write_PORTG_FER(bfin_read_PORTG_FER() | 0x01F8);
+	bfin_write_PORTG_MUX((bfin_read_PORTG_MUX() & ~0x3FC) | 0x154);
+#else
+# error no portmux for this proc yet
+#endif
+	bfin_write_SDH_CFG(bfin_read_SDH_CFG() | CLKS_EN);
+	/* Disable card detect pin */
+	bfin_write_SDH_CFG((bfin_read_SDH_CFG() & 0x1F) | 0x60);
+	mci_set_clk(CONFIG_SYS_MMC_CLK_ID);
+	/* setting power control */
+	pwr_ctl |= ROD_CTL;
+	pwr_ctl |= PWR_ON;
+	bfin_write_SDH_PWR_CTL(pwr_ctl);
+	mmc_card_is_sd = 0;
+	ret = sd_init_card(&cid, verbose);
+	if (ret) {
+		mmc_rca = MMC_DEFAULT_RCA;
+		ret = mmc_init_card(&cid, verbose);
+	}
+	if (ret)
+		return ret;
+	/* Get CSD from the card */
+	ret = mmc_cmd(MMC_CMD_SEND_CSD, mmc_rca << 16, csd, MMC_RSP_R2);
+	if (ret)
+		return ret;
+	if (verbose)
+		mmc_dump_csd(csd);
+	/* Initialize the blockdev structure */
+	mmc_blkdev.if_type = IF_TYPE_MMC;
+	mmc_blkdev.part_type = PART_TYPE_DOS;
+	mmc_blkdev.block_read = mmc_bread;
+	mmc_blkdev.block_write = mmc_bwrite;
+	sprintf(mmc_blkdev.vendor,
+		"Man %02x%04x Snr %08lx",
+		cid.mid, cid.oid, cid.psn);
+	strncpy(mmc_blkdev.product, cid.pnm,
+		sizeof(mmc_blkdev.product));
+	sprintf(mmc_blkdev.revision, "%x %x",
+		cid.prv >> 4, cid.prv & 0x0f);
+
+	max_blksz = 1 << get_bits(csd, 80, 4);
+	/*
+	 * If we can't use 512 byte blocks, refuse to deal with the
+	 * card. Tons of code elsewhere seems to depend on this.
+	 */
+	if (max_blksz < 512 || (max_blksz > 512 && !get_bits(csd, 79, 1))) {
+		printf("Card does not support 512 byte reads, aborting.\n");
+		return -ENODEV;
+	}
+
+	mmc_blkdev.blksz = 512;
+	mmc_blkdev.lba = (get_bits(csd, 62, 12) + 1) * (1 << (get_bits(csd, 47, 3) + 2));
+	mci_set_clk(CONFIG_SYS_MMC_CLK_OP);
+	init_part(&mmc_blkdev);
+	return 0;
+}
+
+int mmc_read(ulong src, uchar *dst, int size)
+{
+	return -ENOSYS;
+}
+
+int mmc_write(uchar *src, ulong dst, int size)
+{
+	return -ENOSYS;
+}
+
+int mmc2info(ulong addr)
+{
+	return 0;
+}
diff --git a/drivers/mmc/bfin_sdh.h b/drivers/mmc/bfin_sdh.h
new file mode 100644
index 0000000..793ec30
--- /dev/null
+++ b/drivers/mmc/bfin_sdh.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2008 Analog Device Inc.
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+#ifndef __BLACKFIN_SDH_H__
+#define __BLACKFIN_SDH_H__
+
+#define MMC_RSP_PRESENT	(1 << 0)
+#define MMC_RSP_136	(1 << 1)		/* 136 bit response */
+#define MMC_RSP_CRC	(1 << 2)		/* expect valid crc */
+#define MMC_RSP_BUSY	(1 << 3)		/* card may send busy */
+#define MMC_RSP_OPCODE	(1 << 4)		/* response contains opcode */
+
+#define MMC_CMD_MASK	(3 << 5)		/* non-SPI command type */
+#define MMC_CMD_AC	(0 << 5)
+#define MMC_CMD_ADTC	(1 << 5)
+#define MMC_CMD_BC	(2 << 5)
+#define MMC_CMD_BCR	(3 << 5)
+
+#define MMC_RSP_SPI_S1	(1 << 7)		/* one status byte */
+#define MMC_RSP_SPI_S2	(1 << 8)		/* second byte */
+#define MMC_RSP_SPI_B4	(1 << 9)		/* four data bytes */
+#define MMC_RSP_SPI_BUSY (1 << 10)		/* card may send busy */
+
+/*
+ * These are the native response types, and correspond to valid bit
+ * patterns of the above flags.  One additional valid pattern
+ * is all zeros, which means we don't expect a response.
+ */
+#define MMC_RSP_NONE	(0)
+#define MMC_RSP_R1	(MMC_RSP_PRESENT|MMC_RSP_CRC|MMC_RSP_OPCODE)
+#define MMC_RSP_R1B	(MMC_RSP_PRESENT|MMC_RSP_CRC|MMC_RSP_OPCODE|MMC_RSP_BUSY)
+#define MMC_RSP_R2	(MMC_RSP_PRESENT|MMC_RSP_136|MMC_RSP_CRC)
+#define MMC_RSP_R3	(MMC_RSP_PRESENT)
+#define MMC_RSP_R4	(MMC_RSP_PRESENT)
+#define MMC_RSP_R5	(MMC_RSP_PRESENT|MMC_RSP_CRC|MMC_RSP_OPCODE)
+#define MMC_RSP_R6	(MMC_RSP_PRESENT|MMC_RSP_CRC|MMC_RSP_OPCODE)
+#define MMC_RSP_R7	(MMC_RSP_PRESENT|MMC_RSP_CRC|MMC_RSP_OPCODE)
+#define ILLEGAL_COMMAND  (1 << 22)
+#define APP_CMD		 (1 << 5)
+
+#endif
diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index f7b2b22..24edb27 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile
@@ -35,6 +35,7 @@
 COBJS-y += nand_util.o
 endif
 
+COBJS-$(CONFIG_DRIVER_NAND_BFIN) += bfin_nand.o
 COBJS-$(CONFIG_NAND_FSL_ELBC) += fsl_elbc_nand.o
 COBJS-$(CONFIG_NAND_FSL_UPM) += fsl_upm.o
 COBJS-$(CONFIG_NAND_S3C64XX) += s3c64xx.o
diff --git a/drivers/mtd/nand/bfin_nand.c b/drivers/mtd/nand/bfin_nand.c
new file mode 100644
index 0000000..f6a0835
--- /dev/null
+++ b/drivers/mtd/nand/bfin_nand.c
@@ -0,0 +1,385 @@
+/*
+ * Driver for Blackfin on-chip NAND controller.
+ *
+ * Enter bugs at http://blackfin.uclinux.org/
+ *
+ * Copyright (c) 2007-2008 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+/* TODO:
+ * - move bit defines into mach-common/bits/nand.h
+ * - try and replace all IRQSTAT usage with STAT polling
+ * - have software ecc mode use same algo as hw ecc ?
+ */
+
+#include <common.h>
+#include <asm/io.h>
+
+#ifdef DEBUG
+# define pr_stamp() printf("%s:%s:%i: here i am\n", __FILE__, __func__, __LINE__)
+#else
+# define pr_stamp()
+#endif
+
+#include <nand.h>
+
+#include <asm/blackfin.h>
+
+/* Bit masks for NFC_CTL */
+
+#define                    WR_DLY  0xf        /* Write Strobe Delay */
+#define                    RD_DLY  0xf0       /* Read Strobe Delay */
+#define                    NWIDTH  0x100      /* NAND Data Width */
+#define                   PG_SIZE  0x200      /* Page Size */
+
+/* Bit masks for NFC_STAT */
+
+#define                     NBUSY  0x1        /* Not Busy */
+#define                   WB_FULL  0x2        /* Write Buffer Full */
+#define                PG_WR_STAT  0x4        /* Page Write Pending */
+#define                PG_RD_STAT  0x8        /* Page Read Pending */
+#define                  WB_EMPTY  0x10       /* Write Buffer Empty */
+
+/* Bit masks for NFC_IRQSTAT */
+
+#define                  NBUSYIRQ  0x1        /* Not Busy IRQ */
+#define                    WB_OVF  0x2        /* Write Buffer Overflow */
+#define                   WB_EDGE  0x4        /* Write Buffer Edge Detect */
+#define                    RD_RDY  0x8        /* Read Data Ready */
+#define                   WR_DONE  0x10       /* Page Write Done */
+
+#define NAND_IS_512() (CONFIG_BFIN_NFC_CTL_VAL & 0x200)
+
+/*
+ * hardware specific access to control-lines
+ */
+static void bfin_nfc_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
+{
+	pr_stamp();
+
+	if (cmd == NAND_CMD_NONE)
+		return;
+
+	while (bfin_read_NFC_STAT() & WB_FULL)
+		continue;
+
+	if (ctrl & NAND_CLE)
+		bfin_write_NFC_CMD(cmd);
+	else
+		bfin_write_NFC_ADDR(cmd);
+	SSYNC();
+}
+
+int bfin_nfc_devready(struct mtd_info *mtd)
+{
+	pr_stamp();
+	return (bfin_read_NFC_STAT() & NBUSY ? 1 : 0);
+}
+
+/*
+ * PIO mode for buffer writing and reading
+ */
+static void bfin_nfc_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
+{
+	pr_stamp();
+
+	int i;
+
+	/*
+	 * Data reads are requested by first writing to NFC_DATA_RD
+	* and then reading back from NFC_READ.
+	*/
+	for (i = 0; i < len; ++i) {
+		while (bfin_read_NFC_STAT() & WB_FULL)
+			if (ctrlc())
+				return;
+
+		/* Contents do not matter */
+		bfin_write_NFC_DATA_RD(0x0000);
+
+		while (!(bfin_read_NFC_IRQSTAT() & RD_RDY))
+			if (ctrlc())
+				return;
+
+		buf[i] = bfin_read_NFC_READ();
+
+		bfin_write_NFC_IRQSTAT(RD_RDY);
+	}
+}
+
+static uint8_t bfin_nfc_read_byte(struct mtd_info *mtd)
+{
+	pr_stamp();
+
+	uint8_t val;
+	bfin_nfc_read_buf(mtd, &val, 1);
+	return val;
+}
+
+static void bfin_nfc_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
+{
+	pr_stamp();
+
+	int i;
+
+	for (i = 0; i < len; ++i) {
+		while (bfin_read_NFC_STAT() & WB_FULL)
+			if (ctrlc())
+				return;
+
+		bfin_write_NFC_DATA_WR(buf[i]);
+	}
+}
+
+/*
+ * ECC functions
+ * These allow the bfin to use the controller's ECC
+ * generator block to ECC the data as it passes through
+ */
+
+/*
+ * ECC error correction function
+ */
+static int bfin_nfc_correct_data_256(struct mtd_info *mtd, u_char *dat,
+					u_char *read_ecc, u_char *calc_ecc)
+{
+	u32 syndrome[5];
+	u32 calced, stored;
+	unsigned short failing_bit, failing_byte;
+	u_char data;
+
+	pr_stamp();
+
+	calced = calc_ecc[0] | (calc_ecc[1] << 8) | (calc_ecc[2] << 16);
+	stored = read_ecc[0] | (read_ecc[1] << 8) | (read_ecc[2] << 16);
+
+	syndrome[0] = (calced ^ stored);
+
+	/*
+	 * syndrome 0: all zero
+	 * No error in data
+	 * No action
+	 */
+	if (!syndrome[0] || !calced || !stored)
+		return 0;
+
+	/*
+	 * sysdrome 0: only one bit is one
+	 * ECC data was incorrect
+	 * No action
+	 */
+	if (hweight32(syndrome[0]) == 1)
+		return 1;
+
+	syndrome[1] = (calced & 0x7FF) ^ (stored & 0x7FF);
+	syndrome[2] = (calced & 0x7FF) ^ ((calced >> 11) & 0x7FF);
+	syndrome[3] = (stored & 0x7FF) ^ ((stored >> 11) & 0x7FF);
+	syndrome[4] = syndrome[2] ^ syndrome[3];
+
+	/*
+	 * sysdrome 0: exactly 11 bits are one, each parity
+	 * and parity' pair is 1 & 0 or 0 & 1.
+	 * 1-bit correctable error
+	 * Correct the error
+	 */
+	if (hweight32(syndrome[0]) == 11 && syndrome[4] == 0x7FF) {
+		failing_bit = syndrome[1] & 0x7;
+		failing_byte = syndrome[1] >> 0x3;
+		data = *(dat + failing_byte);
+		data = data ^ (0x1 << failing_bit);
+		*(dat + failing_byte) = data;
+
+		return 0;
+	}
+
+	/*
+	 * sysdrome 0: random data
+	 * More than 1-bit error, non-correctable error
+	 * Discard data, mark bad block
+	 */
+
+	return 1;
+}
+
+static int bfin_nfc_correct_data(struct mtd_info *mtd, u_char *dat,
+					u_char *read_ecc, u_char *calc_ecc)
+{
+	int ret;
+
+	pr_stamp();
+
+	ret = bfin_nfc_correct_data_256(mtd, dat, read_ecc, calc_ecc);
+
+	/* If page size is 512, correct second 256 bytes */
+	if (NAND_IS_512()) {
+		dat += 256;
+		read_ecc += 8;
+		calc_ecc += 8;
+		ret |= bfin_nfc_correct_data_256(mtd, dat, read_ecc, calc_ecc);
+	}
+
+	return ret;
+}
+
+static void reset_ecc(void)
+{
+	bfin_write_NFC_RST(0x1);
+	while (bfin_read_NFC_RST() & 1)
+		continue;
+}
+
+static void bfin_nfc_enable_hwecc(struct mtd_info *mtd, int mode)
+{
+	reset_ecc();
+}
+
+static int bfin_nfc_calculate_ecc(struct mtd_info *mtd,
+		const u_char *dat, u_char *ecc_code)
+{
+	u16 ecc0, ecc1;
+	u32 code[2];
+	u8 *p;
+
+	pr_stamp();
+
+	/* first 4 bytes ECC code for 256 page size */
+	ecc0 = bfin_read_NFC_ECC0();
+	ecc1 = bfin_read_NFC_ECC1();
+
+	code[0] = (ecc0 & 0x7FF) | ((ecc1 & 0x7FF) << 11);
+
+	/* first 3 bytes in ecc_code for 256 page size */
+	p = (u8 *) code;
+	memcpy(ecc_code, p, 3);
+
+	/* second 4 bytes ECC code for 512 page size */
+	if (NAND_IS_512()) {
+		ecc0 = bfin_read_NFC_ECC2();
+		ecc1 = bfin_read_NFC_ECC3();
+		code[1] = (ecc0 & 0x7FF) | ((ecc1 & 0x7FF) << 11);
+
+		/* second 3 bytes in ecc_code for second 256
+		 * bytes of 512 page size
+		 */
+		p = (u8 *) (code + 1);
+		memcpy((ecc_code + 3), p, 3);
+	}
+
+	reset_ecc();
+
+	return 0;
+}
+
+#ifdef CONFIG_BFIN_NFC_BOOTROM_ECC
+# define BOOTROM_ECC 1
+#else
+# define BOOTROM_ECC 0
+#endif
+
+static uint8_t bbt_pattern[] = { 0xff };
+
+static struct nand_bbt_descr bootrom_bbt = {
+	.options = 0,
+	.offs = 63,
+	.len = 1,
+	.pattern = bbt_pattern,
+};
+
+static struct nand_ecclayout bootrom_ecclayout = {
+	.eccbytes = 24,
+	.eccpos = {
+		0x8 * 0, 0x8 * 0 + 1, 0x8 * 0 + 2,
+		0x8 * 1, 0x8 * 1 + 1, 0x8 * 1 + 2,
+		0x8 * 2, 0x8 * 2 + 1, 0x8 * 2 + 2,
+		0x8 * 3, 0x8 * 3 + 1, 0x8 * 3 + 2,
+		0x8 * 4, 0x8 * 4 + 1, 0x8 * 4 + 2,
+		0x8 * 5, 0x8 * 5 + 1, 0x8 * 5 + 2,
+		0x8 * 6, 0x8 * 6 + 1, 0x8 * 6 + 2,
+		0x8 * 7, 0x8 * 7 + 1, 0x8 * 7 + 2
+	},
+	.oobfree = {
+		{ 0x8 * 0 + 3, 5 },
+		{ 0x8 * 1 + 3, 5 },
+		{ 0x8 * 2 + 3, 5 },
+		{ 0x8 * 3 + 3, 5 },
+		{ 0x8 * 4 + 3, 5 },
+		{ 0x8 * 5 + 3, 5 },
+		{ 0x8 * 6 + 3, 5 },
+		{ 0x8 * 7 + 3, 5 },
+	}
+};
+
+/*
+ * Board-specific NAND initialization. The following members of the
+ * argument are board-specific (per include/linux/mtd/nand.h):
+ * - IO_ADDR_R?: address to read the 8 I/O lines of the flash device
+ * - IO_ADDR_W?: address to write the 8 I/O lines of the flash device
+ * - cmd_ctrl: hardwarespecific function for accesing control-lines
+ * - dev_ready: hardwarespecific function for  accesing device ready/busy line
+ * - enable_hwecc?: function to enable (reset)  hardware ecc generator. Must
+ *   only be provided if a hardware ECC is available
+ * - ecc.mode: mode of ecc, see defines
+ * - chip_delay: chip dependent delay for transfering data from array to
+ *   read regs (tR)
+ * - options: various chip options. They can partly be set to inform
+ *   nand_scan about special functionality. See the defines for further
+ *   explanation
+ * Members with a "?" were not set in the merged testing-NAND branch,
+ * so they are not set here either.
+ */
+int board_nand_init(struct nand_chip *chip)
+{
+	pr_stamp();
+
+	/* set width/ecc/timings/etc... */
+	bfin_write_NFC_CTL(CONFIG_BFIN_NFC_CTL_VAL);
+
+	/* clear interrupt status */
+	bfin_write_NFC_IRQMASK(0x0);
+	bfin_write_NFC_IRQSTAT(0xffff);
+
+	/* enable GPIO function enable register */
+#ifdef __ADSPBF54x__
+	bfin_write_PORTJ_FER(bfin_read_PORTJ_FER() | 6);
+#elif defined(__ADSPBF52x__)
+	bfin_write_PORTH_FER(bfin_read_PORTH_FER() | 0xFCFF);
+	bfin_write_PORTH_MUX(0);
+#else
+# error no support for this variant
+#endif
+
+	chip->cmd_ctrl = bfin_nfc_cmd_ctrl;
+	chip->read_buf = bfin_nfc_read_buf;
+	chip->write_buf = bfin_nfc_write_buf;
+	chip->read_byte = bfin_nfc_read_byte;
+
+#ifdef CONFIG_BFIN_NFC_NO_HW_ECC
+# define ECC_HW 0
+#else
+# define ECC_HW 1
+#endif
+	if (ECC_HW) {
+		if (BOOTROM_ECC) {
+			chip->badblock_pattern = &bootrom_bbt;
+			chip->ecc.layout = &bootrom_ecclayout;
+		}
+		if (!NAND_IS_512()) {
+			chip->ecc.bytes = 3;
+			chip->ecc.size = 256;
+		} else {
+			chip->ecc.bytes = 6;
+			chip->ecc.size = 512;
+		}
+		chip->ecc.mode = NAND_ECC_HW;
+		chip->ecc.calculate = bfin_nfc_calculate_ecc;
+		chip->ecc.correct   = bfin_nfc_correct_data;
+		chip->ecc.hwctl     = bfin_nfc_enable_hwecc;
+	} else
+		chip->ecc.mode = NAND_ECC_SOFT;
+	chip->dev_ready = bfin_nfc_devready;
+	chip->chip_delay = 0;
+
+	return 0;
+}
diff --git a/drivers/net/bfin_mac.c b/drivers/net/bfin_mac.c
index dddbb78..23f934a 100644
--- a/drivers/net/bfin_mac.c
+++ b/drivers/net/bfin_mac.c
@@ -12,6 +12,8 @@
 #include <netdev.h>
 #include <command.h>
 #include <malloc.h>
+#include <miiphy.h>
+#include <linux/mii.h>
 
 #include <asm/blackfin.h>
 #include <asm/mach-common/bits/dma.h>
@@ -20,16 +22,15 @@
 
 #include "bfin_mac.h"
 
-#ifdef CONFIG_POST
-#include <post.h>
+#ifndef CONFIG_PHY_ADDR
+# define CONFIG_PHY_ADDR 1
+#endif
+#ifndef CONFIG_PHY_CLOCK_FREQ
+# define CONFIG_PHY_CLOCK_FREQ 2500000
 #endif
 
-#undef DEBUG_ETHERNET
-
-#ifdef DEBUG_ETHERNET
-#define DEBUGF(fmt, args...) printf(fmt, ##args)
-#else
-#define DEBUGF(fmt, args...)
+#ifdef CONFIG_POST
+#include <post.h>
 #endif
 
 #define RXBUF_BASE_ADDR		0xFF900000
@@ -38,42 +39,61 @@
 
 #define TOUT_LOOP		1000000
 
-ADI_ETHER_BUFFER *txbuf[TX_BUF_CNT];
-ADI_ETHER_BUFFER *rxbuf[PKTBUFSRX];
+static ADI_ETHER_BUFFER *txbuf[TX_BUF_CNT];
+static ADI_ETHER_BUFFER *rxbuf[PKTBUFSRX];
 static u16 txIdx;		/* index of the current RX buffer */
 static u16 rxIdx;		/* index of the current TX buffer */
 
-u16 PHYregs[NO_PHY_REGS];	/* u16 PHYADDR; */
-
 /* DMAx_CONFIG values at DMA Restart */
-const ADI_DMA_CONFIG_REG rxdmacfg = {
-	.b_DMA_EN  = 1,	/* enabled */
-	.b_WNR     = 1,	/* write to memory */
-	.b_WDSIZE  = 2,	/* wordsize is 32 bits */
-	.b_DMA2D   = 0,
-	.b_RESTART = 0,
-	.b_DI_SEL  = 0,
-	.b_DI_EN   = 0,	/* no interrupt */
-	.b_NDSIZE  = 5,	/* 5 half words is desc size */
-	.b_FLOW    = 7	/* large desc flow */
+static const union {
+	u16 data;
+	ADI_DMA_CONFIG_REG reg;
+} txdmacfg = {
+	.reg = {
+		.b_DMA_EN  = 1,	/* enabled */
+		.b_WNR     = 0,	/* read from memory */
+		.b_WDSIZE  = 2,	/* wordsize is 32 bits */
+		.b_DMA2D   = 0,
+		.b_RESTART = 0,
+		.b_DI_SEL  = 0,
+		.b_DI_EN   = 0,	/* no interrupt */
+		.b_NDSIZE  = 5,	/* 5 half words is desc size */
+		.b_FLOW    = 7	/* large desc flow */
+	},
 };
 
-const ADI_DMA_CONFIG_REG txdmacfg = {
-	.b_DMA_EN  = 1,	/* enabled */
-	.b_WNR     = 0,	/* read from memory */
-	.b_WDSIZE  = 2,	/* wordsize is 32 bits */
-	.b_DMA2D   = 0,
-	.b_RESTART = 0,
-	.b_DI_SEL  = 0,
-	.b_DI_EN   = 0,	/* no interrupt */
-	.b_NDSIZE  = 5,	/* 5 half words is desc size */
-	.b_FLOW    = 7	/* large desc flow */
-};
+static int bfin_miiphy_wait(void)
+{
+	/* poll the STABUSY bit */
+	while (bfin_read_EMAC_STAADD() & STABUSY)
+		continue;
+	return 0;
+}
+
+static int bfin_miiphy_read(char *devname, uchar addr, uchar reg, ushort *val)
+{
+	if (bfin_miiphy_wait())
+		return 1;
+	bfin_write_EMAC_STAADD(SET_PHYAD(addr) | SET_REGAD(reg) | STABUSY);
+	if (bfin_miiphy_wait())
+		return 1;
+	*val = bfin_read_EMAC_STADAT();
+	return 0;
+}
+
+static int bfin_miiphy_write(char *devname, uchar addr, uchar reg, ushort val)
+{
+	if (bfin_miiphy_wait())
+		return 1;
+	bfin_write_EMAC_STADAT(val);
+	bfin_write_EMAC_STAADD(SET_PHYAD(addr) | SET_REGAD(reg) | STAOP | STABUSY);
+	return 0;
+}
 
 int bfin_EMAC_initialize(bd_t *bis)
 {
 	struct eth_device *dev;
-	dev = (struct eth_device *)malloc(sizeof(*dev));
+	dev = malloc(sizeof(*dev));
 	if (dev == NULL)
 		hang();
 
@@ -89,6 +109,10 @@
 
 	eth_register(dev);
 
+#if defined(CONFIG_MII) || defined(CONFIG_CMD_MII)
+	miiphy_register(dev->name, bfin_miiphy_read, bfin_miiphy_write);
+#endif
+
 	return 0;
 }
 
@@ -119,8 +143,8 @@
 	txbuf[txIdx]->FrmData->NoBytes = length;
 	memcpy(txbuf[txIdx]->FrmData->Dest, (void *)packet, length);
 	txbuf[txIdx]->Dma[0].START_ADDR = (u32) txbuf[txIdx]->FrmData;
-	*pDMA2_NEXT_DESC_PTR = &txbuf[txIdx]->Dma[0];
-	*pDMA2_CONFIG = *(u16 *) (void *)(&txdmacfg);
+	*pDMA2_NEXT_DESC_PTR = txbuf[txIdx]->Dma;
+	*pDMA2_CONFIG = txdmacfg.data;
 	*pEMAC_OPMODE |= TE;
 
 	for (i = 0; (txbuf[txIdx]->StatusWord & TX_COMP) == 0; i++) {
@@ -136,7 +160,7 @@
 	else
 		txIdx++;
  out:
-	DEBUGF("BFIN EMAC send: length = %d\n", length);
+	debug("BFIN EMAC send: length = %d\n", length);
 	return result;
 }
 
@@ -182,156 +206,19 @@
  *
  *************************************************************/
 
-static int bfin_EMAC_init(struct eth_device *dev, bd_t *bd)
-{
-	u32 opmode;
-	int dat;
-	int i;
-	DEBUGF("Eth_init: ......\n");
+/* MDC = SCLK / MDC_freq / 2 - 1 */
+#define MDC_FREQ_TO_DIV(mdc_freq) (get_sclk() / (mdc_freq) / 2 - 1)
 
-	txIdx = 0;
-	rxIdx = 0;
-
-/* Initialize System Register */
-	if (SetupSystemRegs(&dat) < 0)
-		return -1;
-
-/* Initialize EMAC address */
-	bfin_EMAC_setup_addr(bd);
-
-/* Initialize TX and RX buffer */
-	for (i = 0; i < PKTBUFSRX; i++) {
-		rxbuf[i] = SetupRxBuffer(i);
-		if (i > 0) {
-			rxbuf[i - 1]->Dma[1].NEXT_DESC_PTR =
-			    &(rxbuf[i]->Dma[0]);
-			if (i == (PKTBUFSRX - 1))
-				rxbuf[i]->Dma[1].NEXT_DESC_PTR =
-				    &(rxbuf[0]->Dma[0]);
-		}
-	}
-	for (i = 0; i < TX_BUF_CNT; i++) {
-		txbuf[i] = SetupTxBuffer(i);
-		if (i > 0) {
-			txbuf[i - 1]->Dma[1].NEXT_DESC_PTR =
-			    &(txbuf[i]->Dma[0]);
-			if (i == (TX_BUF_CNT - 1))
-				txbuf[i]->Dma[1].NEXT_DESC_PTR =
-				    &(txbuf[0]->Dma[0]);
-		}
-	}
-
-	/* Set RX DMA */
-	*pDMA1_NEXT_DESC_PTR = &rxbuf[0]->Dma[0];
-	*pDMA1_CONFIG = *((u16 *) (void *)&rxbuf[0]->Dma[0].CONFIG);
-
-	/* Wait MII done */
-	PollMdcDone();
-
-	/* We enable only RX here */
-	/* ASTP   : Enable Automatic Pad Stripping
-	   PR     : Promiscuous Mode for test
-	   PSF    : Receive frames with total length less than 64 bytes.
-	   FDMODE : Full Duplex Mode
-	   LB	  : Internal Loopback for test
-	   RE     : Receiver Enable */
-	if (dat == FDMODE)
-		opmode = ASTP | FDMODE | PSF;
-	else
-		opmode = ASTP | PSF;
-	opmode |= RE;
-#ifdef CONFIG_BFIN_MAC_RMII
-	opmode |= TE | RMII;
-#endif
-	/* Turn on the EMAC */
-	*pEMAC_OPMODE = opmode;
-	return 0;
-}
-
-static void bfin_EMAC_halt(struct eth_device *dev)
-{
-	DEBUGF("Eth_halt: ......\n");
-	/* Turn off the EMAC */
-	*pEMAC_OPMODE = 0x00000000;
-	/* Turn off the EMAC RX DMA */
-	*pDMA1_CONFIG = 0x0000;
-	*pDMA2_CONFIG = 0x0000;
-
-}
-
-void bfin_EMAC_setup_addr(bd_t *bd)
-{
-	*pEMAC_ADDRLO =
-		bd->bi_enetaddr[0] |
-		bd->bi_enetaddr[1] << 8 |
-		bd->bi_enetaddr[2] << 16 |
-		bd->bi_enetaddr[3] << 24;
-	*pEMAC_ADDRHI =
-		bd->bi_enetaddr[4] |
-		bd->bi_enetaddr[5] << 8;
-}
-
-static void PollMdcDone(void)
-{
-	/* poll the STABUSY bit */
-	while (*pEMAC_STAADD & STABUSY) ;
-}
-
-static void WrPHYReg(u16 PHYAddr, u16 RegAddr, u16 Data)
-{
-	PollMdcDone();
-
-	*pEMAC_STADAT = Data;
-
-	*pEMAC_STAADD = SET_PHYAD(PHYAddr) | SET_REGAD(RegAddr) |
-	    STAOP | STAIE | STABUSY;
-}
-
-/*********************************************************************************
- *		Read an off-chip register in a PHY through the MDC/MDIO port     *
- *********************************************************************************/
-static u16 RdPHYReg(u16 PHYAddr, u16 RegAddr)
-{
-	u16 Data;
-
-	PollMdcDone();
-
-	*pEMAC_STAADD = SET_PHYAD(PHYAddr) | SET_REGAD(RegAddr) |
-	    STAIE | STABUSY;
-
-	PollMdcDone();
-
-	Data = (u16) * pEMAC_STADAT;
-
-	PHYregs[RegAddr] = Data;	/* save shadow copy */
-
-	return Data;
-}
-
-#if 0 /* dead code ? */
-static void SoftResetPHY(void)
+static int bfin_miiphy_init(struct eth_device *dev, int *opmode)
 {
 	u16 phydat;
-	/* set the reset bit */
-	WrPHYReg(PHYADDR, PHY_MODECTL, PHY_RESET);
-	/* and clear it again */
-	WrPHYReg(PHYADDR, PHY_MODECTL, 0x0000);
-	do {
-		/* poll until reset is complete */
-		phydat = RdPHYReg(PHYADDR, PHY_MODECTL);
-	} while ((phydat & PHY_RESET) != 0);
-}
-#endif
+	size_t count;
 
-static int SetupSystemRegs(int *opmode)
-{
-	u16 sysctl, phydat;
-	int count = 0;
 	/* Enable PHY output */
 	*pVR_CTL |= CLKBUFOE;
-	/* Set all the pins to peripheral mode */
 
-#ifdef CONFIG_BFIN_MAC_RMII
+	/* Set all the pins to peripheral mode */
+#ifdef CONFIG_RMII
 	/* grab RMII pins */
 # if defined(__ADSPBF51x__)
 	*pPORTF_MUX = (*pPORTF_MUX & \
@@ -368,36 +255,35 @@
 # endif
 #endif
 
-	/* MDC  = 2.5 MHz */
-	sysctl = SET_MDCDIV(24);
 	/* Odd word alignment for Receive Frame DMA word */
 	/* Configure checksum support and rcve frame word alignment */
-	sysctl |= RXDWA | RXCKS;
-	*pEMAC_SYSCTL = sysctl;
-	/* auto negotiation on  */
-	/* full duplex */
-	/* 100 Mbps */
-	phydat = PHY_ANEG_EN | PHY_DUPLEX | PHY_SPD_SET;
-	WrPHYReg(PHYADDR, PHY_MODECTL, phydat);
-	do {
-		udelay(1000);
-		phydat = RdPHYReg(PHYADDR, PHY_MODESTAT);
-		if (count > 3000) {
-			printf
-			    ("Link is down, please check your network connection\n");
+	bfin_write_EMAC_SYSCTL(RXDWA | RXCKS | SET_MDCDIV(MDC_FREQ_TO_DIV(CONFIG_PHY_CLOCK_FREQ)));
+
+	/* turn on auto-negotiation and wait for link to come up */
+	bfin_miiphy_write(dev->name, CONFIG_PHY_ADDR, MII_BMCR, BMCR_ANENABLE);
+	count = 0;
+	while (1) {
+		++count;
+		if (bfin_miiphy_read(dev->name, CONFIG_PHY_ADDR, MII_BMSR, &phydat))
+			return -1;
+		if (phydat & BMSR_LSTATUS)
+			break;
+		if (count > 30000) {
+			printf("%s: link down, check cable\n", dev->name);
 			return -1;
 		}
-		count++;
-	} while (!(phydat & 0x0004));
+		udelay(100);
+	}
 
-	phydat = RdPHYReg(PHYADDR, PHY_ANLPAR);
-
-	if ((phydat & 0x0100) || (phydat & 0x0040))
+	/* see what kind of link we have */
+	if (bfin_miiphy_read(dev->name, CONFIG_PHY_ADDR, MII_LPA, &phydat))
+		return -1;
+	if (phydat & LPA_DUPLEX)
 		*opmode = FDMODE;
 	else
 		*opmode = 0;
 
-	*pEMAC_MMC_CTL = RSTC | CROLL;
+	bfin_write_EMAC_MMC_CTL(RSTC | CROLL);
 
 	/* Initialize the TX DMA channel registers */
 	*pDMA2_X_COUNT = 0;
@@ -410,9 +296,95 @@
 	*pDMA1_X_MODIFY = 4;
 	*pDMA1_Y_COUNT = 0;
 	*pDMA1_Y_MODIFY = 0;
+
 	return 0;
 }
 
+static int bfin_EMAC_init(struct eth_device *dev, bd_t *bd)
+{
+	u32 opmode;
+	int dat;
+	int i;
+	debug("Eth_init: ......\n");
+
+	txIdx = 0;
+	rxIdx = 0;
+
+	/* Initialize System Register */
+	if (bfin_miiphy_init(dev, &dat) < 0)
+		return -1;
+
+	/* Initialize EMAC address */
+	bfin_EMAC_setup_addr(bd);
+
+	/* Initialize TX and RX buffer */
+	for (i = 0; i < PKTBUFSRX; i++) {
+		rxbuf[i] = SetupRxBuffer(i);
+		if (i > 0) {
+			rxbuf[i - 1]->Dma[1].NEXT_DESC_PTR = rxbuf[i]->Dma;
+			if (i == (PKTBUFSRX - 1))
+				rxbuf[i]->Dma[1].NEXT_DESC_PTR = rxbuf[0]->Dma;
+		}
+	}
+	for (i = 0; i < TX_BUF_CNT; i++) {
+		txbuf[i] = SetupTxBuffer(i);
+		if (i > 0) {
+			txbuf[i - 1]->Dma[1].NEXT_DESC_PTR = txbuf[i]->Dma;
+			if (i == (TX_BUF_CNT - 1))
+				txbuf[i]->Dma[1].NEXT_DESC_PTR = txbuf[0]->Dma;
+		}
+	}
+
+	/* Set RX DMA */
+	*pDMA1_NEXT_DESC_PTR = rxbuf[0]->Dma;
+	*pDMA1_CONFIG = rxbuf[0]->Dma[0].CONFIG_DATA;
+
+	/* Wait MII done */
+	bfin_miiphy_wait();
+
+	/* We enable only RX here */
+	/* ASTP   : Enable Automatic Pad Stripping
+	   PR     : Promiscuous Mode for test
+	   PSF    : Receive frames with total length less than 64 bytes.
+	   FDMODE : Full Duplex Mode
+	   LB	  : Internal Loopback for test
+	   RE     : Receiver Enable */
+	if (dat == FDMODE)
+		opmode = ASTP | FDMODE | PSF;
+	else
+		opmode = ASTP | PSF;
+	opmode |= RE;
+#ifdef CONFIG_RMII
+	opmode |= TE | RMII;
+#endif
+	/* Turn on the EMAC */
+	*pEMAC_OPMODE = opmode;
+	return 0;
+}
+
+static void bfin_EMAC_halt(struct eth_device *dev)
+{
+	debug("Eth_halt: ......\n");
+	/* Turn off the EMAC */
+	*pEMAC_OPMODE = 0x00000000;
+	/* Turn off the EMAC RX DMA */
+	*pDMA1_CONFIG = 0x0000;
+	*pDMA2_CONFIG = 0x0000;
+
+}
+
+void bfin_EMAC_setup_addr(bd_t *bd)
+{
+	*pEMAC_ADDRLO =
+		bd->bi_enetaddr[0] |
+		bd->bi_enetaddr[1] << 8 |
+		bd->bi_enetaddr[2] << 16 |
+		bd->bi_enetaddr[3] << 24;
+	*pEMAC_ADDRHI =
+		bd->bi_enetaddr[4] |
+		bd->bi_enetaddr[5] << 8;
+}
+
 ADI_ETHER_BUFFER *SetupRxBuffer(int no)
 {
 	ADI_ETHER_FRAME_BUFFER *frmbuf;
@@ -420,10 +392,8 @@
 	int nobytes_buffer = sizeof(ADI_ETHER_BUFFER[2]) / 2;	/* ensure a multi. of 4 */
 	int total_size = nobytes_buffer + RECV_BUFSIZE;
 
-	buf = (ADI_ETHER_BUFFER *) (RXBUF_BASE_ADDR + no * total_size);
-	frmbuf =
-	    (ADI_ETHER_FRAME_BUFFER *) (RXBUF_BASE_ADDR + no * total_size +
-					nobytes_buffer);
+	buf = (void *) (RXBUF_BASE_ADDR + no * total_size);
+	frmbuf = (void *) (RXBUF_BASE_ADDR + no * total_size + nobytes_buffer);
 
 	memset(buf, 0x00, nobytes_buffer);
 	buf->FrmData = frmbuf;
@@ -439,7 +409,7 @@
 	buf->Dma[0].CONFIG.b_FLOW = 7;	/* large desc flow */
 
 	/* set up second desc to point to status word */
-	buf->Dma[1].NEXT_DESC_PTR = &(buf->Dma[0]);
+	buf->Dma[1].NEXT_DESC_PTR = buf->Dma;
 	buf->Dma[1].START_ADDR = (u32) & buf->IPHdrChksum;
 	buf->Dma[1].CONFIG.b_DMA_EN = 1;	/* enabled */
 	buf->Dma[1].CONFIG.b_WNR = 1;	/* Write to memory */
@@ -458,10 +428,8 @@
 	int nobytes_buffer = sizeof(ADI_ETHER_BUFFER[2]) / 2;	/* ensure a multi. of 4 */
 	int total_size = nobytes_buffer + RECV_BUFSIZE;
 
-	buf = (ADI_ETHER_BUFFER *) (TXBUF_BASE_ADDR + no * total_size);
-	frmbuf =
-	    (ADI_ETHER_FRAME_BUFFER *) (TXBUF_BASE_ADDR + no * total_size +
-					nobytes_buffer);
+	buf = (void *) (TXBUF_BASE_ADDR + no * total_size);
+	frmbuf = (void *) (TXBUF_BASE_ADDR + no * total_size + nobytes_buffer);
 
 	memset(buf, 0x00, nobytes_buffer);
 	buf->FrmData = frmbuf;
diff --git a/drivers/net/bfin_mac.h b/drivers/net/bfin_mac.h
index c8a94d0..084f533 100644
--- a/drivers/net/bfin_mac.h
+++ b/drivers/net/bfin_mac.h
@@ -9,29 +9,8 @@
 #ifndef __BFIN_MAC_H__
 #define __BFIN_MAC_H__
 
-#define PHYADDR			0x01
-#define NO_PHY_REGS		0x20
-
-#define DEFAULT_PHY_PHYID1	0x0007
-#define DEFAULT_PHY_PHYID2	0xC0A3
-#define PHY_MODECTL		0x00
-#define PHY_MODESTAT		0x01
-#define PHY_PHYID1		0x02
-#define PHY_PHYID2		0x03
-#define PHY_ANAR		0x04
-#define PHY_ANLPAR		0x05
-#define PHY_ANER		0x06
-
-#define PHY_RESET		0x8000
-#define PHY_ANEG_EN		0x1000
-#define PHY_DUPLEX		0x0100
-#define PHY_SPD_SET		0x2000
-
 #define RECV_BUFSIZE		(0x614)
 
-typedef volatile u32 reg32;
-typedef volatile u16 reg16;
-
 typedef struct ADI_DMA_CONFIG_REG {
 	u16 b_DMA_EN:1;		/* 0	Enabled				*/
 	u16 b_WNR:1;		/* 1	Direction			*/
@@ -56,7 +35,10 @@
 typedef struct dma_descriptor {
 	struct dma_descriptor *NEXT_DESC_PTR;
 	u32 START_ADDR;
-	ADI_DMA_CONFIG_REG CONFIG;
+	union {
+		u16 CONFIG_DATA;
+		ADI_DMA_CONFIG_REG CONFIG;
+	};
 } DMA_DESCRIPTOR;
 /* 10 bytes/struct in 12 bytes */
 
@@ -79,11 +61,6 @@
 static int bfin_EMAC_send(struct eth_device *dev, volatile void *packet, int length);
 static int bfin_EMAC_recv(struct eth_device *dev);
 
-static void PollMdcDone(void);
-static void WrPHYReg(u16 PHYAddr, u16 RegAddr, u16 Data);
-static u16 RdPHYReg(u16 PHYAddr, u16 RegAddr);
-static int SetupSystemRegs(int *opmode);
-
 static void bfin_EMAC_setup_addr(bd_t *bd);
 
 #endif
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index 15e0f7a..6e2c121 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -26,6 +26,7 @@
 LIB	:= $(obj)libspi.a
 
 COBJS-$(CONFIG_ATMEL_SPI) += atmel_spi.o
+COBJS-$(CONFIG_BFIN_SPI) += bfin_spi.o
 COBJS-$(CONFIG_MPC8XXX_SPI) += mpc8xxx_spi.o
 COBJS-$(CONFIG_MXC_SPI) += mxc_spi.o
 COBJS-$(CONFIG_SOFT_SPI) += soft_spi.o
diff --git a/drivers/spi/bfin_spi.c b/drivers/spi/bfin_spi.c
new file mode 100644
index 0000000..d22862a
--- /dev/null
+++ b/drivers/spi/bfin_spi.c
@@ -0,0 +1,343 @@
+/*
+ * Driver for Blackfin On-Chip SPI device
+ *
+ * Copyright (c) 2005-2008 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+/*#define DEBUG*/
+
+#include <common.h>
+#include <malloc.h>
+#include <spi.h>
+
+#include <asm/blackfin.h>
+#include <asm/mach-common/bits/spi.h>
+
+struct bfin_spi_slave {
+	struct spi_slave slave;
+	void *mmr_base;
+	u16 ctl, baud, flg;
+};
+
+#define MAKE_SPI_FUNC(mmr, off) \
+static inline void write_##mmr(struct bfin_spi_slave *bss, u16 val) { bfin_write16(bss->mmr_base + off, val); } \
+static inline u16 read_##mmr(struct bfin_spi_slave *bss) { return bfin_read16(bss->mmr_base + off); }
+MAKE_SPI_FUNC(SPI_CTL,  0x00)
+MAKE_SPI_FUNC(SPI_FLG,  0x04)
+MAKE_SPI_FUNC(SPI_STAT, 0x08)
+MAKE_SPI_FUNC(SPI_TDBR, 0x0c)
+MAKE_SPI_FUNC(SPI_RDBR, 0x10)
+MAKE_SPI_FUNC(SPI_BAUD, 0x14)
+
+#define to_bfin_spi_slave(s) container_of(s, struct bfin_spi_slave, slave)
+
+__attribute__((weak))
+int spi_cs_is_valid(unsigned int bus, unsigned int cs)
+{
+	return (cs >= 1 && cs <= 7);
+}
+
+__attribute__((weak))
+void spi_cs_activate(struct spi_slave *slave)
+{
+	struct bfin_spi_slave *bss = to_bfin_spi_slave(slave);
+	write_SPI_FLG(bss,
+		(read_SPI_FLG(bss) &
+		~((!bss->flg << 8) << slave->cs)) |
+		(1 << slave->cs));
+	debug("%s: SPI_FLG:%x\n", __func__, read_SPI_FLG(bss));
+}
+
+__attribute__((weak))
+void spi_cs_deactivate(struct spi_slave *slave)
+{
+	struct bfin_spi_slave *bss = to_bfin_spi_slave(slave);
+	write_SPI_FLG(bss, read_SPI_FLG(bss) & ~(1 << slave->cs));
+	debug("%s: SPI_FLG:%x\n", __func__, read_SPI_FLG(bss));
+}
+
+void spi_init()
+{
+}
+
+struct spi_slave *spi_setup_slave(unsigned int bus, unsigned int cs,
+		unsigned int max_hz, unsigned int mode)
+{
+	struct bfin_spi_slave *bss;
+	u32 mmr_base;
+	u32 baud;
+
+	if (!spi_cs_is_valid(bus, cs))
+		return NULL;
+
+	switch (bus) {
+#ifdef SPI_CTL
+# define SPI0_CTL SPI_CTL
+#endif
+		case 0: mmr_base = SPI0_CTL; break;
+#ifdef SPI1_CTL
+		case 1: mmr_base = SPI1_CTL; break;
+#endif
+#ifdef SPI2_CTL
+		case 2: mmr_base = SPI2_CTL; break;
+#endif
+		default: return NULL;
+	}
+
+	baud = get_sclk() / (2 * max_hz);
+	if (baud < 2)
+		baud = 2;
+	else if (baud > (u16)-1)
+		baud = -1;
+
+	bss = malloc(sizeof(*bss));
+	if (!bss)
+		return NULL;
+
+	bss->slave.bus = bus;
+	bss->slave.cs = cs;
+	bss->mmr_base = (void *)mmr_base;
+	bss->ctl = SPE | MSTR | TDBR_CORE;
+	if (mode & SPI_CPHA) bss->ctl |= CPHA;
+	if (mode & SPI_CPOL) bss->ctl |= CPOL;
+	if (mode & SPI_LSB_FIRST) bss->ctl |= LSBF;
+	bss->baud = baud;
+	bss->flg = mode & SPI_CS_HIGH ? 1 : 0;
+
+	debug("%s: bus:%i cs:%i mmr:%x ctl:%x baud:%i flg:%i\n", __func__,
+		bus, cs, mmr_base, bss->ctl, baud, bss->flg);
+
+	return &bss->slave;
+}
+
+void spi_free_slave(struct spi_slave *slave)
+{
+	struct bfin_spi_slave *bss = to_bfin_spi_slave(slave);
+	free(bss);
+}
+
+static void spi_portmux(struct spi_slave *slave)
+{
+#if defined(__ADSPBF51x__)
+#define SET_MUX(port, mux, func) port##_mux = ((port##_mux & ~PORT_x_MUX_##mux##_MASK) | PORT_x_MUX_##mux##_FUNC_##func)
+	u16 f_mux = bfin_read_PORTF_MUX();
+	u16 f_fer = bfin_read_PORTF_FER();
+	u16 g_mux = bfin_read_PORTG_MUX();
+	u16 g_fer = bfin_read_PORTG_FER();
+	u16 h_mux = bfin_read_PORTH_MUX();
+	u16 h_fer = bfin_read_PORTH_FER();
+	switch (slave->bus) {
+	case 0:
+		/* set SCK/MISO/MOSI */
+		SET_MUX(g, 7, 1);
+		g_fer |= PG12 | PG13 | PG14;
+		switch (slave->cs) {
+			case 1: SET_MUX(f, 2, 1); f_fer |= PF7;  break;
+			case 2: /* see G above */ g_fer |= PG15; break;
+			case 3: SET_MUX(h, 1, 3); f_fer |= PH4;  break;
+			case 4: /* no muxing */                  break;
+			case 5: SET_MUX(g, 1, 3); h_fer |= PG3;  break;
+			case 6: /* no muxing */                  break;
+			case 7: /* no muxing */                  break;
+		}
+	case 1:
+		/* set SCK/MISO/MOSI */
+		SET_MUX(h, 0, 2);
+		h_fer |= PH1 | PH2 | PH3;
+		switch (slave->cs) {
+			case 1: SET_MUX(h, 2, 3); h_fer |= PH6;  break;
+			case 2: SET_MUX(f, 0, 3); f_fer |= PF0;  break;
+			case 3: SET_MUX(g, 0, 3); g_fer |= PG0;  break;
+			case 4: SET_MUX(f, 3, 3); f_fer |= PF8;  break;
+			case 5: SET_MUX(g, 6, 3); h_fer |= PG11; break;
+			case 6: /* no muxing */                  break;
+			case 7: /* no muxing */                  break;
+		}
+	}
+	bfin_write_PORTF_MUX(f_mux);
+	bfin_write_PORTF_FER(f_fer);
+	bfin_write_PORTG_MUX(g_mux);
+	bfin_write_PORTG_FER(g_fer);
+	bfin_write_PORTH_MUX(h_mux);
+	bfin_write_PORTH_FER(h_fer);
+#elif defined(__ADSPBF52x__)
+#define SET_MUX(port, mux, func) port##_mux = ((port##_mux & ~PORT_x_MUX_##mux##_MASK) | PORT_x_MUX_##mux##_FUNC_##func)
+	u16 f_mux = bfin_read_PORTF_MUX();
+	u16 f_fer = bfin_read_PORTF_FER();
+	u16 g_mux = bfin_read_PORTG_MUX();
+	u16 g_fer = bfin_read_PORTG_FER();
+	u16 h_mux = bfin_read_PORTH_MUX();
+	u16 h_fer = bfin_read_PORTH_FER();
+	/* set SCK/MISO/MOSI */
+	SET_MUX(g, 0, 3);
+	g_fer |= PG2 | PG3 | PG4;
+	switch (slave->cs) {
+		case 1: /* see G above */ g_fer |= PG1;  break;
+		case 2: SET_MUX(f, 4, 3); f_fer |= PF12; break;
+		case 3: SET_MUX(f, 4, 3); f_fer |= PF13; break;
+		case 4: SET_MUX(h, 1, 1); h_fer |= PH8;  break;
+		case 5: SET_MUX(h, 2, 1); h_fer |= PH9;  break;
+		case 6: SET_MUX(f, 1, 3); f_fer |= PF9;  break;
+		case 7: SET_MUX(f, 2, 3); f_fer |= PF10; break;
+	}
+	bfin_write_PORTF_MUX(f_mux);
+	bfin_write_PORTF_FER(f_fer);
+	bfin_write_PORTG_MUX(g_mux);
+	bfin_write_PORTG_FER(g_fer);
+	bfin_write_PORTH_MUX(h_mux);
+	bfin_write_PORTH_FER(h_fer);
+#elif defined(__ADSPBF534__) || defined(__ADSPBF536__) || defined(__ADSPBF537__)
+	u16 mux = bfin_read_PORT_MUX();
+	u16 f_fer = bfin_read_PORTF_FER();
+	u16 j_fer = bfin_read_PORTJ_FER();
+	/* set SCK/MISO/MOSI */
+	f_fer |= PF11 | PF12 | PF13;
+	switch (slave->cs) {
+		case 1: f_fer |= PF10; break;
+		case 2: mux |= PJSE; j_fer |= PJ11; break;
+		case 3: mux |= PJSE; j_fer |= PJ10; break;
+		case 4: mux |= PFS4E; f_fer |= PF6; break;
+		case 5: mux |= PFS5E; f_fer |= PF5; break;
+		case 6: mux |= PFS6E; f_fer |= PF4; break;
+		case 7: mux |= PJCE_SPI; j_fer |= PJ5; break;
+	}
+	bfin_write_PORT_MUX(mux);
+	bfin_write_PORTF_FER(f_fer);
+	bfin_write_PORTJ_FER(j_fer);
+#elif defined(__ADSPBF54x__)
+#define DO_MUX(port, pin) \
+	mux = ((mux & ~PORT_x_MUX_##pin##_MASK) | PORT_x_MUX_##pin##_FUNC_1); \
+	fer |= P##port##pin;
+	u32 mux;
+	u16 fer;
+	switch (slave->bus) {
+	case 0:
+		mux = bfin_read_PORTE_MUX();
+		fer = bfin_read_PORTE_FER();
+		/* set SCK/MISO/MOSI */
+		DO_MUX(E, 0);
+		DO_MUX(E, 1);
+		DO_MUX(E, 2);
+		switch (slave->cs) {
+			case 1: DO_MUX(E, 4); break;
+			case 2: DO_MUX(E, 5); break;
+			case 3: DO_MUX(E, 6); break;
+		}
+		bfin_write_PORTE_MUX(mux);
+		bfin_write_PORTE_FER(fer);
+		break;
+	case 1:
+		mux = bfin_read_PORTG_MUX();
+		fer = bfin_read_PORTG_FER();
+		/* set SCK/MISO/MOSI */
+		DO_MUX(G, 8);
+		DO_MUX(G, 9);
+		DO_MUX(G, 10);
+		switch (slave->cs) {
+			case 1: DO_MUX(G, 5); break;
+			case 2: DO_MUX(G, 6); break;
+			case 3: DO_MUX(G, 7); break;
+		}
+		bfin_write_PORTG_MUX(mux);
+		bfin_write_PORTG_FER(fer);
+		break;
+	case 2:
+		mux = bfin_read_PORTB_MUX();
+		fer = bfin_read_PORTB_FER();
+		/* set SCK/MISO/MOSI */
+		DO_MUX(B, 12);
+		DO_MUX(B, 13);
+		DO_MUX(B, 14);
+		switch (slave->cs) {
+			case 1: DO_MUX(B, 9);  break;
+			case 2: DO_MUX(B, 10); break;
+			case 3: DO_MUX(B, 11); break;
+		}
+		bfin_write_PORTB_MUX(mux);
+		bfin_write_PORTB_FER(fer);
+		break;
+	}
+#endif
+}
+
+int spi_claim_bus(struct spi_slave *slave)
+{
+	struct bfin_spi_slave *bss = to_bfin_spi_slave(slave);
+
+	debug("%s: bus:%i cs:%i\n", __func__, slave->bus, slave->cs);
+
+	spi_portmux(slave);
+	write_SPI_CTL(bss, bss->ctl);
+	write_SPI_BAUD(bss, bss->baud);
+	SSYNC();
+
+	return 0;
+}
+
+void spi_release_bus(struct spi_slave *slave)
+{
+	struct bfin_spi_slave *bss = to_bfin_spi_slave(slave);
+	debug("%s: bus:%i cs:%i\n", __func__, slave->bus, slave->cs);
+	write_SPI_CTL(bss, 0);
+	SSYNC();
+}
+
+int spi_xfer(struct spi_slave *slave, unsigned int bitlen, const void *dout,
+		void *din, unsigned long flags)
+{
+	struct bfin_spi_slave *bss = to_bfin_spi_slave(slave);
+	const u8 *tx = dout;
+	u8 *rx = din;
+	uint bytes = bitlen / 8;
+	int ret = 0;
+
+	debug("%s: bus:%i cs:%i bitlen:%i bytes:%i flags:%lx\n", __func__,
+		slave->bus, slave->cs, bitlen, bytes, flags);
+
+	if (bitlen == 0)
+		goto done;
+
+	/* we can only do 8 bit transfers */
+	if (bitlen % 8) {
+		flags |= SPI_XFER_END;
+		goto done;
+	}
+
+	if (flags & SPI_XFER_BEGIN)
+		spi_cs_activate(slave);
+
+	/* todo: take advantage of hardware fifos and setup RX dma */
+	while (bytes--) {
+		u8 value = (tx ? *tx++ : 0);
+		debug("%s: tx:%x ", __func__, value);
+		write_SPI_TDBR(bss, value);
+		SSYNC();
+		while ((read_SPI_STAT(bss) & TXS))
+			if (ctrlc()) {
+				ret = -1;
+				goto done;
+			}
+		while (!(read_SPI_STAT(bss) & SPIF))
+			if (ctrlc()) {
+				ret = -1;
+				goto done;
+			}
+		while (!(read_SPI_STAT(bss) & RXS))
+			if (ctrlc()) {
+				ret = -1;
+				goto done;
+			}
+		value = read_SPI_RDBR(bss);
+		if (rx)
+			*rx++ = value;
+		debug("rx:%x\n", value);
+	}
+
+ done:
+	if (flags & SPI_XFER_END)
+		spi_cs_deactivate(slave);
+
+	return ret;
+}
diff --git a/include/asm-blackfin/blackfin-config-post.h b/include/asm-blackfin/blackfin-config-post.h
index 21abd72..fea4737 100644
--- a/include/asm-blackfin/blackfin-config-post.h
+++ b/include/asm-blackfin/blackfin-config-post.h
@@ -67,6 +67,11 @@
 # define CONFIG_LINUX_CMDLINE_SIZE L1_SRAM_SCRATCH_SIZE
 #endif
 
+/* Set default SPI flash CS to the one we boot from */
+#if defined(CONFIG_ENV_IS_IN_SPI_FLASH) && !defined(CONFIG_ENV_SPI_CS)
+# define CONFIG_ENV_SPI_CS BFIN_BOOT_SPI_SSEL
+#endif
+
 /* Default/common Blackfin memory layout */
 #ifndef CONFIG_SYS_SDRAM_BASE
 # define CONFIG_SYS_SDRAM_BASE 0
diff --git a/include/asm-blackfin/blackfin-config-pre.h b/include/asm-blackfin/blackfin-config-pre.h
index 714352b..a1fae5c 100644
--- a/include/asm-blackfin/blackfin-config-pre.h
+++ b/include/asm-blackfin/blackfin-config-pre.h
@@ -38,4 +38,26 @@
 #define BFIN_BOOT_8HOST_DMA   12      /* boot ldr from 8-bit host dma */
 #define BFIN_BOOT_NAND        13      /* boot ldr from nand flash */
 
+#ifndef __ASSEMBLY__
+static inline const char *get_bfin_boot_mode(int bfin_boot)
+{
+	switch (bfin_boot) {
+	case BFIN_BOOT_BYPASS:     return "bypass";
+	case BFIN_BOOT_PARA:       return "parallel flash";
+	case BFIN_BOOT_SPI_MASTER: return "spi flash";
+	case BFIN_BOOT_SPI_SLAVE:  return "spi slave";
+	case BFIN_BOOT_TWI_MASTER: return "i2c flash";
+	case BFIN_BOOT_TWI_SLAVE:  return "i2c slave";
+	case BFIN_BOOT_UART:       return "uart";
+	case BFIN_BOOT_IDLE:       return "idle";
+	case BFIN_BOOT_FIFO:       return "fifo";
+	case BFIN_BOOT_MEM:        return "memory";
+	case BFIN_BOOT_16HOST_DMA: return "16bit dma";
+	case BFIN_BOOT_8HOST_DMA:  return "8bit dma";
+	case BFIN_BOOT_NAND:       return "nand flash";
+	default:                   return "INVALID";
+	}
+}
+#endif
+
 #endif
diff --git a/include/asm-blackfin/blackfin_local.h b/include/asm-blackfin/blackfin_local.h
index c9ee91a..e17d8a2 100644
--- a/include/asm-blackfin/blackfin_local.h
+++ b/include/asm-blackfin/blackfin_local.h
@@ -43,6 +43,9 @@
 #define SCLK_TO_MSEC(sclk) ((MSEC_PER_SEC * ((sclk) / USEC_PER_MSEC)) / (BFIN_SCLK / USEC_PER_MSEC))
 #define MSEC_TO_SCLK(msec) ((((BFIN_SCLK / USEC_PER_MSEC) * (msec)) / MSEC_PER_SEC) * USEC_PER_MSEC)
 
+#define L1_CACHE_SHIFT 5
+#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
+
 #include <asm/linkage.h>
 
 #ifndef __ASSEMBLY__
@@ -52,25 +55,22 @@
 
 # include <linux/types.h>
 
+extern u_long get_vco(void);
+extern u_long get_cclk(void);
 extern u_long get_sclk(void);
 
 # define bfin_revid() (*pCHIPID >> 28)
 
 extern void blackfin_icache_flush_range(const void *, const void *);
 extern void blackfin_dcache_flush_range(const void *, const void *);
+extern void blackfin_icache_dcache_flush_range(const void *, const void *);
 extern void blackfin_dcache_flush_invalidate_range(const void *, const void *);
 
-/* Use DMA to move data from on chip to external memory.  While this is
- * required for only L1 instruction (it is not directly readable by the
- * core via data loads), it isn't a huge performance issue for other
- * regions (it's probably even faster than core load/stores).  However,
- * the DMA engine does not have access to the L1 scratchpad, and we
- * cannot use DMA inside of the MMR space.
+/* Use DMA to move data from on chip to external memory.  The L1 instruction
+ * regions can only be accessed via DMA, so if the address in question is in
+ * that region, make sure we attempt to DMA indirectly.
  */
-# define addr_bfin_on_chip_mem(addr) \
-	(((unsigned long)(addr) >= 0xef000000 && (unsigned long)addr < SYSMMR_BASE) && \
-	 !((unsigned long)(addr) >= L1_SRAM_SCRATCH && \
-	   (unsigned long)(addr) < L1_SRAM_SCRATCH_END))
+# define addr_bfin_on_chip_mem(addr) (((unsigned long)(addr) & 0xFFF00000) == 0xFFA00000)
 
 # include <asm/system.h>
 
diff --git a/include/asm-blackfin/mach-bf548/ports.h b/include/asm-blackfin/mach-bf548/ports.h
index c828516..5f0e45e 100644
--- a/include/asm-blackfin/mach-bf548/ports.h
+++ b/include/asm-blackfin/mach-bf548/ports.h
@@ -236,7 +236,25 @@
 #define PH14			0x4000
 #define PH15			0x8000
 
-/* Port J Masks */
+/* Port I Masks */
+#define PI0			0x0001
+#define PI1			0x0002
+#define PI2			0x0004
+#define PI3			0x0008
+#define PI4			0x0010
+#define PI5			0x0020
+#define PI6			0x0040
+#define PI7			0x0080
+#define PI8			0x0100
+#define PI9			0x0200
+#define PI10			0x0400
+#define PI11			0x0800
+#define PI12			0x1000
+#define PI13			0x2000
+#define PI14			0x4000
+#define PI15			0x8000
+
+/* Port I Masks */
 #define PJ0			0x0001
 #define PJ1			0x0002
 #define PJ2			0x0004
diff --git a/include/asm-blackfin/mach-common/bits/ebiu.h b/include/asm-blackfin/mach-common/bits/ebiu.h
index af456fb..7c0c569 100644
--- a/include/asm-blackfin/mach-common/bits/ebiu.h
+++ b/include/asm-blackfin/mach-common/bits/ebiu.h
@@ -357,8 +357,8 @@
 #define EBSZ_32		0x0002		/* SDRAM External Bank Size = 32MB */
 #define EBSZ_64		0x0004		/* SDRAM External Bank Size = 64MB */
 #define EBSZ_128	0x0006		/* SDRAM External Bank Size = 128MB */
-#define EBSZ_256	0x0007		/* SDRAM External Bank Size = 256MB */
-#define EBSZ_512	0x0008		/* SDRAM External Bank Size = 512MB */
+#define EBSZ_256	0x0008		/* SDRAM External Bank Size = 256MB */
+#define EBSZ_512	0x000A		/* SDRAM External Bank Size = 512MB */
 #define EBCAW_8		0x0000		/* SDRAM External Bank Column Address Width = 8 Bits */
 #define EBCAW_9		0x0010		/* SDRAM External Bank Column Address Width = 9 Bits */
 #define EBCAW_10	0x0020		/* SDRAM External Bank Column Address Width = 10 Bits */
diff --git a/include/asm-blackfin/mach-common/bits/pata.h b/include/asm-blackfin/mach-common/bits/pata.h
new file mode 100644
index 0000000..9b61824
--- /dev/null
+++ b/include/asm-blackfin/mach-common/bits/pata.h
@@ -0,0 +1,220 @@
+/*
+ * ATAPI Masks
+ */
+
+#ifndef __BFIN_PERIPHERAL_PATA__
+#define __BFIN_PERIPHERAL_PATA__
+
+/* Bit masks for ATAPI_CONTROL */
+#define                 PIO_START  0x1        /* Start PIO/Reg Op */
+#define               MULTI_START  0x2        /* Start Multi-DMA Op */
+#define               ULTRA_START  0x4        /* Start Ultra-DMA Op */
+#define                  XFER_DIR  0x8        /* Transfer Direction */
+#define                  IORDY_EN  0x10       /* IORDY Enable */
+#define                FIFO_FLUSH  0x20       /* Flush FIFOs */
+#define                  SOFT_RST  0x40       /* Soft Reset */
+#define                   DEV_RST  0x80       /* Device Reset */
+#define                TFRCNT_RST  0x100      /* Trans Count Reset */
+#define               END_ON_TERM  0x200      /* End/Terminate Select */
+#define               PIO_USE_DMA  0x400      /* PIO-DMA Enable */
+#define          UDMAIN_FIFO_THRS  0xf000     /* Ultra DMA-IN FIFO Threshold */
+
+/* Bit masks for ATAPI_STATUS */
+#define               PIO_XFER_ON  0x1        /* PIO transfer in progress */
+#define             MULTI_XFER_ON  0x2        /* Multi-word DMA transfer in progress */
+#define             ULTRA_XFER_ON  0x4        /* Ultra DMA transfer in progress */
+#define               ULTRA_IN_FL  0xf0       /* Ultra DMA Input FIFO Level */
+
+/* Bit masks for ATAPI_DEV_ADDR */
+#define                  DEV_ADDR  0x1f       /* Device Address */
+
+/* Bit masks for ATAPI_INT_MASK */
+#define        ATAPI_DEV_INT_MASK  0x1        /* Device interrupt mask */
+#define             PIO_DONE_MASK  0x2        /* PIO transfer done interrupt mask */
+#define           MULTI_DONE_MASK  0x4        /* Multi-DMA transfer done interrupt mask */
+#define          UDMAIN_DONE_MASK  0x8        /* Ultra-DMA in transfer done interrupt mask */
+#define         UDMAOUT_DONE_MASK  0x10       /* Ultra-DMA out transfer done interrupt mask */
+#define       HOST_TERM_XFER_MASK  0x20       /* Host terminate current transfer interrupt mask */
+#define           MULTI_TERM_MASK  0x40       /* Device terminate Multi-DMA transfer interrupt mask */
+#define          UDMAIN_TERM_MASK  0x80       /* Device terminate Ultra-DMA-in transfer interrupt mask */
+#define         UDMAOUT_TERM_MASK  0x100      /* Device terminate Ultra-DMA-out transfer interrupt mask */
+
+/* Bit masks for ATAPI_INT_STATUS */
+#define             ATAPI_DEV_INT  0x1        /* Device interrupt status */
+#define              PIO_DONE_INT  0x2        /* PIO transfer done interrupt status */
+#define            MULTI_DONE_INT  0x4        /* Multi-DMA transfer done interrupt status */
+#define           UDMAIN_DONE_INT  0x8        /* Ultra-DMA in transfer done interrupt status */
+#define          UDMAOUT_DONE_INT  0x10       /* Ultra-DMA out transfer done interrupt status */
+#define        HOST_TERM_XFER_INT  0x20       /* Host terminate current transfer interrupt status */
+#define            MULTI_TERM_INT  0x40       /* Device terminate Multi-DMA transfer interrupt status */
+#define           UDMAIN_TERM_INT  0x80       /* Device terminate Ultra-DMA-in transfer interrupt status */
+#define          UDMAOUT_TERM_INT  0x100      /* Device terminate Ultra-DMA-out transfer interrupt status */
+
+/* Bit masks for ATAPI_LINE_STATUS */
+#define                ATAPI_INTR  0x1        /* Device interrupt to host line status */
+#define                ATAPI_DASP  0x2        /* Device dasp to host line status */
+#define                ATAPI_CS0N  0x4        /* ATAPI chip select 0 line status */
+#define                ATAPI_CS1N  0x8        /* ATAPI chip select 1 line status */
+#define                ATAPI_ADDR  0x70       /* ATAPI address line status */
+#define              ATAPI_DMAREQ  0x80       /* ATAPI DMA request line status */
+#define             ATAPI_DMAACKN  0x100      /* ATAPI DMA acknowledge line status */
+#define               ATAPI_DIOWN  0x200      /* ATAPI write line status */
+#define               ATAPI_DIORN  0x400      /* ATAPI read line status */
+#define               ATAPI_IORDY  0x800      /* ATAPI IORDY line status */
+
+/* Bit masks for ATAPI_SM_STATE */
+#define                PIO_CSTATE  0xf        /* PIO mode state machine current state */
+#define                DMA_CSTATE  0xf0       /* DMA mode state machine current state */
+#define             UDMAIN_CSTATE  0xf00      /* Ultra DMA-In mode state machine current state */
+#define            UDMAOUT_CSTATE  0xf000     /* ATAPI IORDY line status */
+
+/* Bit masks for ATAPI_TERMINATE */
+#define           ATAPI_HOST_TERM  0x1        /* Host terminationation */
+
+/* Bit masks for ATAPI_REG_TIM_0 */
+#define                    T2_REG  0xff       /* End of cycle time for register access transfers */
+#define                  TEOC_REG  0xff00     /* Selects DIOR/DIOW pulsewidth */
+
+/* Bit masks for ATAPI_PIO_TIM_0 */
+#define                    T1_REG  0xf        /* Time from address valid to DIOR/DIOW */
+#define                T2_REG_PIO  0xff0      /* DIOR/DIOW pulsewidth */
+#define                    T4_REG  0xf000     /* DIOW data hold */
+
+/* Bit masks for ATAPI_PIO_TIM_1 */
+#define              TEOC_REG_PIO  0xff       /* End of cycle time for PIO access transfers. */
+
+/* Bit masks for ATAPI_MULTI_TIM_0 */
+#define                        TD  0xff       /* DIOR/DIOW asserted pulsewidth */
+#define                        TM  0xff00     /* Time from address valid to DIOR/DIOW */
+
+/* Bit masks for ATAPI_MULTI_TIM_1 */
+#define                       TKW  0xff       /* Selects DIOW negated pulsewidth */
+#define                       TKR  0xff00     /* Selects DIOR negated pulsewidth */
+
+/* Bit masks for ATAPI_MULTI_TIM_2 */
+#define                        TH  0xff       /* Selects DIOW data hold */
+#define                      TEOC  0xff00     /* Selects end of cycle for DMA */
+
+/* Bit masks for ATAPI_ULTRA_TIM_0 */
+#define                      TACK  0xff       /* Selects setup and hold times for TACK */
+#define                      TENV  0xff00     /* Selects envelope time */
+
+/* Bit masks for ATAPI_ULTRA_TIM_1 */
+#define                      TDVS  0xff       /* Selects data valid setup time */
+#define                 TCYC_TDVS  0xff00     /* Selects cycle time - TDVS time */
+
+/* Bit masks for ATAPI_ULTRA_TIM_2 */
+#define                       TSS  0xff       /* Selects time from STROBE edge to negation of DMARQ or assertion of STOP */
+#define                      TMLI  0xff00     /* Selects interlock time */
+
+/* Bit masks for ATAPI_ULTRA_TIM_3 */
+#define                      TZAH  0xff       /* Selects minimum delay required for output */
+#define               READY_PAUSE  0xff00     /* Selects ready to pause */
+
+/* Bit masks for ATAPI_CONTROL */
+#define                 PIO_START  0x1        /* Start PIO/Reg Op */
+#define               MULTI_START  0x2        /* Start Multi-DMA Op */
+#define               ULTRA_START  0x4        /* Start Ultra-DMA Op */
+#define                  XFER_DIR  0x8        /* Transfer Direction */
+#define                  IORDY_EN  0x10       /* IORDY Enable */
+#define                FIFO_FLUSH  0x20       /* Flush FIFOs */
+#define                  SOFT_RST  0x40       /* Soft Reset */
+#define                   DEV_RST  0x80       /* Device Reset */
+#define                TFRCNT_RST  0x100      /* Trans Count Reset */
+#define               END_ON_TERM  0x200      /* End/Terminate Select */
+#define               PIO_USE_DMA  0x400      /* PIO-DMA Enable */
+#define          UDMAIN_FIFO_THRS  0xf000     /* Ultra DMA-IN FIFO Threshold */
+
+/* Bit masks for ATAPI_STATUS */
+#define               PIO_XFER_ON  0x1        /* PIO transfer in progress */
+#define             MULTI_XFER_ON  0x2        /* Multi-word DMA transfer in progress */
+#define             ULTRA_XFER_ON  0x4        /* Ultra DMA transfer in progress */
+#define               ULTRA_IN_FL  0xf0       /* Ultra DMA Input FIFO Level */
+
+/* Bit masks for ATAPI_DEV_ADDR */
+#define                  DEV_ADDR  0x1f       /* Device Address */
+
+/* Bit masks for ATAPI_INT_MASK */
+#define        ATAPI_DEV_INT_MASK  0x1        /* Device interrupt mask */
+#define             PIO_DONE_MASK  0x2        /* PIO transfer done interrupt mask */
+#define           MULTI_DONE_MASK  0x4        /* Multi-DMA transfer done interrupt mask */
+#define          UDMAIN_DONE_MASK  0x8        /* Ultra-DMA in transfer done interrupt mask */
+#define         UDMAOUT_DONE_MASK  0x10       /* Ultra-DMA out transfer done interrupt mask */
+#define       HOST_TERM_XFER_MASK  0x20       /* Host terminate current transfer interrupt mask */
+#define           MULTI_TERM_MASK  0x40       /* Device terminate Multi-DMA transfer interrupt mask */
+#define          UDMAIN_TERM_MASK  0x80       /* Device terminate Ultra-DMA-in transfer interrupt mask */
+#define         UDMAOUT_TERM_MASK  0x100      /* Device terminate Ultra-DMA-out transfer interrupt mask */
+
+/* Bit masks for ATAPI_INT_STATUS */
+#define             ATAPI_DEV_INT  0x1        /* Device interrupt status */
+#define              PIO_DONE_INT  0x2        /* PIO transfer done interrupt status */
+#define            MULTI_DONE_INT  0x4        /* Multi-DMA transfer done interrupt status */
+#define           UDMAIN_DONE_INT  0x8        /* Ultra-DMA in transfer done interrupt status */
+#define          UDMAOUT_DONE_INT  0x10       /* Ultra-DMA out transfer done interrupt status */
+#define        HOST_TERM_XFER_INT  0x20       /* Host terminate current transfer interrupt status */
+#define            MULTI_TERM_INT  0x40       /* Device terminate Multi-DMA transfer interrupt status */
+#define           UDMAIN_TERM_INT  0x80       /* Device terminate Ultra-DMA-in transfer interrupt status */
+#define          UDMAOUT_TERM_INT  0x100      /* Device terminate Ultra-DMA-out transfer interrupt status */
+
+/* Bit masks for ATAPI_LINE_STATUS */
+#define                ATAPI_INTR  0x1        /* Device interrupt to host line status */
+#define                ATAPI_DASP  0x2        /* Device dasp to host line status */
+#define                ATAPI_CS0N  0x4        /* ATAPI chip select 0 line status */
+#define                ATAPI_CS1N  0x8        /* ATAPI chip select 1 line status */
+#define                ATAPI_ADDR  0x70       /* ATAPI address line status */
+#define              ATAPI_DMAREQ  0x80       /* ATAPI DMA request line status */
+#define             ATAPI_DMAACKN  0x100      /* ATAPI DMA acknowledge line status */
+#define               ATAPI_DIOWN  0x200      /* ATAPI write line status */
+#define               ATAPI_DIORN  0x400      /* ATAPI read line status */
+#define               ATAPI_IORDY  0x800      /* ATAPI IORDY line status */
+
+/* Bit masks for ATAPI_SM_STATE */
+#define                PIO_CSTATE  0xf        /* PIO mode state machine current state */
+#define                DMA_CSTATE  0xf0       /* DMA mode state machine current state */
+#define             UDMAIN_CSTATE  0xf00      /* Ultra DMA-In mode state machine current state */
+#define            UDMAOUT_CSTATE  0xf000     /* ATAPI IORDY line status */
+
+/* Bit masks for ATAPI_TERMINATE */
+#define           ATAPI_HOST_TERM  0x1        /* Host terminationation */
+
+/* Bit masks for ATAPI_REG_TIM_0 */
+#define                    T2_REG  0xff       /* End of cycle time for register access transfers */
+#define                  TEOC_REG  0xff00     /* Selects DIOR/DIOW pulsewidth */
+
+/* Bit masks for ATAPI_PIO_TIM_0 */
+#define                    T1_REG  0xf        /* Time from address valid to DIOR/DIOW */
+#define                T2_REG_PIO  0xff0      /* DIOR/DIOW pulsewidth */
+#define                    T4_REG  0xf000     /* DIOW data hold */
+
+/* Bit masks for ATAPI_PIO_TIM_1 */
+#define              TEOC_REG_PIO  0xff       /* End of cycle time for PIO access transfers. */
+
+/* Bit masks for ATAPI_MULTI_TIM_0 */
+#define                        TD  0xff       /* DIOR/DIOW asserted pulsewidth */
+#define                        TM  0xff00     /* Time from address valid to DIOR/DIOW */
+
+/* Bit masks for ATAPI_MULTI_TIM_1 */
+#define                       TKW  0xff       /* Selects DIOW negated pulsewidth */
+#define                       TKR  0xff00     /* Selects DIOR negated pulsewidth */
+
+/* Bit masks for ATAPI_MULTI_TIM_2 */
+#define                        TH  0xff       /* Selects DIOW data hold */
+#define                      TEOC  0xff00     /* Selects end of cycle for DMA */
+
+/* Bit masks for ATAPI_ULTRA_TIM_0 */
+#define                      TACK  0xff       /* Selects setup and hold times for TACK */
+#define                      TENV  0xff00     /* Selects envelope time */
+
+/* Bit masks for ATAPI_ULTRA_TIM_1 */
+#define                      TDVS  0xff       /* Selects data valid setup time */
+#define                 TCYC_TDVS  0xff00     /* Selects cycle time - TDVS time */
+
+/* Bit masks for ATAPI_ULTRA_TIM_2 */
+#define                       TSS  0xff       /* Selects time from STROBE edge to negation of DMARQ or assertion of STOP */
+#define                      TMLI  0xff00     /* Selects interlock time */
+
+/* Bit masks for ATAPI_ULTRA_TIM_3 */
+#define                      TZAH  0xff       /* Selects minimum delay required for output */
+#define               READY_PAUSE  0xff00     /* Selects ready to pause */
+
+#endif /* __BFIN_PERIPHERAL_PATA__ */
diff --git a/include/asm-blackfin/mach-common/bits/sdh.h b/include/asm-blackfin/mach-common/bits/sdh.h
new file mode 100644
index 0000000..8c5dd33
--- /dev/null
+++ b/include/asm-blackfin/mach-common/bits/sdh.h
@@ -0,0 +1,122 @@
+/*
+ * SDH Masks
+ */
+
+#ifndef __BFIN_PERIPHERAL_SDH__
+#define __BFIN_PERIPHERAL_SDH__
+
+/* Bit masks for SDH_COMMAND */
+#define                   CMD_IDX  0x3f       /* Command Index */
+#define                   CMD_RSP  0x40       /* Response */
+#define                 CMD_L_RSP  0x80       /* Long Response */
+#define                 CMD_INT_E  0x100      /* Command Interrupt */
+#define                CMD_PEND_E  0x200      /* Command Pending */
+#define                     CMD_E  0x400      /* Command Enable */
+
+/* Bit masks for SDH_PWR_CTL */
+#define                    PWR_ON  0x3        /* Power On */
+#define                 SD_CMD_OD  0x40       /* Open Drain Output */
+#define                   ROD_CTL  0x80       /* Rod Control */
+
+/* Bit masks for SDH_CLK_CTL */
+#define                    CLKDIV  0xff       /* MC_CLK Divisor */
+#define                     CLK_E  0x100      /* MC_CLK Bus Clock Enable */
+#define                  PWR_SV_E  0x200      /* Power Save Enable */
+#define             CLKDIV_BYPASS  0x400      /* Bypass Divisor */
+#define                  WIDE_BUS  0x800      /* Wide Bus Mode Enable */
+
+/* Bit masks for SDH_RESP_CMD */
+#define                  RESP_CMD  0x3f       /* Response Command */
+
+/* Bit masks for SDH_DATA_CTL */
+#define                     DTX_E  0x1        /* Data Transfer Enable */
+#define                   DTX_DIR  0x2        /* Data Transfer Direction */
+#define                  DTX_MODE  0x4        /* Data Transfer Mode */
+#define                 DTX_DMA_E  0x8        /* Data Transfer DMA Enable */
+#define              DTX_BLK_LGTH  0xf0       /* Data Transfer Block Length */
+
+/* Bit masks for SDH_STATUS */
+#define              CMD_CRC_FAIL  0x1        /* CMD CRC Fail */
+#define              DAT_CRC_FAIL  0x2        /* Data CRC Fail */
+#define              CMD_TIME_OUT  0x4        /* CMD Time Out */
+#define              DAT_TIME_OUT  0x8        /* Data Time Out */
+#define               TX_UNDERRUN  0x10       /* Transmit Underrun */
+#define                RX_OVERRUN  0x20       /* Receive Overrun */
+#define              CMD_RESP_END  0x40       /* CMD Response End */
+#define                  CMD_SENT  0x80       /* CMD Sent */
+#define                   DAT_END  0x100      /* Data End */
+#define             START_BIT_ERR  0x200      /* Start Bit Error */
+#define               DAT_BLK_END  0x400      /* Data Block End */
+#define                   CMD_ACT  0x800      /* CMD Active */
+#define                    TX_ACT  0x1000     /* Transmit Active */
+#define                    RX_ACT  0x2000     /* Receive Active */
+#define              TX_FIFO_STAT  0x4000     /* Transmit FIFO Status */
+#define              RX_FIFO_STAT  0x8000     /* Receive FIFO Status */
+#define              TX_FIFO_FULL  0x10000    /* Transmit FIFO Full */
+#define              RX_FIFO_FULL  0x20000    /* Receive FIFO Full */
+#define              TX_FIFO_ZERO  0x40000    /* Transmit FIFO Empty */
+#define               RX_DAT_ZERO  0x80000    /* Receive FIFO Empty */
+#define                TX_DAT_RDY  0x100000   /* Transmit Data Available */
+#define               RX_FIFO_RDY  0x200000   /* Receive Data Available */
+
+/* Bit masks for SDH_STATUS_CLR */
+#define         CMD_CRC_FAIL_STAT  0x1        /* CMD CRC Fail Status */
+#define         DAT_CRC_FAIL_STAT  0x2        /* Data CRC Fail Status */
+#define          CMD_TIMEOUT_STAT  0x4        /* CMD Time Out Status */
+#define          DAT_TIMEOUT_STAT  0x8        /* Data Time Out status */
+#define          TX_UNDERRUN_STAT  0x10       /* Transmit Underrun Status */
+#define           RX_OVERRUN_STAT  0x20       /* Receive Overrun Status */
+#define         CMD_RESP_END_STAT  0x40       /* CMD Response End Status */
+#define             CMD_SENT_STAT  0x80       /* CMD Sent Status */
+#define              DAT_END_STAT  0x100      /* Data End Status */
+#define        START_BIT_ERR_STAT  0x200      /* Start Bit Error Status */
+#define          DAT_BLK_END_STAT  0x400      /* Data Block End Status */
+
+/* Bit masks for SDH_MASK0 */
+#define         CMD_CRC_FAIL_MASK  0x1        /* CMD CRC Fail Mask */
+#define         DAT_CRC_FAIL_MASK  0x2        /* Data CRC Fail Mask */
+#define          CMD_TIMEOUT_MASK  0x4        /* CMD Time Out Mask */
+#define          DAT_TIMEOUT_MASK  0x8        /* Data Time Out Mask */
+#define          TX_UNDERRUN_MASK  0x10       /* Transmit Underrun Mask */
+#define           RX_OVERRUN_MASK  0x20       /* Receive Overrun Mask */
+#define         CMD_RESP_END_MASK  0x40       /* CMD Response End Mask */
+#define             CMD_SENT_MASK  0x80       /* CMD Sent Mask */
+#define              DAT_END_MASK  0x100      /* Data End Mask */
+#define        START_BIT_ERR_MASK  0x200      /* Start Bit Error Mask */
+#define          DAT_BLK_END_MASK  0x400      /* Data Block End Mask */
+#define              CMD_ACT_MASK  0x800      /* CMD Active Mask */
+#define               TX_ACT_MASK  0x1000     /* Transmit Active Mask */
+#define               RX_ACT_MASK  0x2000     /* Receive Active Mask */
+#define         TX_FIFO_STAT_MASK  0x4000     /* Transmit FIFO Status Mask */
+#define         RX_FIFO_STAT_MASK  0x8000     /* Receive FIFO Status Mask */
+#define         TX_FIFO_FULL_MASK  0x10000    /* Transmit FIFO Full Mask */
+#define         RX_FIFO_FULL_MASK  0x20000    /* Receive FIFO Full Mask */
+#define         TX_FIFO_ZERO_MASK  0x40000    /* Transmit FIFO Empty Mask */
+#define          RX_DAT_ZERO_MASK  0x80000    /* Receive FIFO Empty Mask */
+#define           TX_DAT_RDY_MASK  0x100000   /* Transmit Data Available Mask */
+#define          RX_FIFO_RDY_MASK  0x200000   /* Receive Data Available Mask */
+
+/* Bit masks for SDH_FIFO_CNT */
+#define                FIFO_COUNT  0x7fff     /* FIFO Count */
+
+/* Bit masks for SDH_E_STATUS */
+#define              SDIO_INT_DET  0x2        /* SDIO Int Detected */
+#define               SD_CARD_DET  0x10       /* SD Card Detect */
+
+/* Bit masks for SDH_E_MASK */
+#define                  SDIO_MSK  0x2        /* Mask SDIO Int Detected */
+#define                   SCD_MSK  0x40       /* Mask Card Detect */
+
+/* Bit masks for SDH_CFG */
+#define                   CLKS_EN  0x1        /* Clocks Enable */
+#define                      SD4E  0x4        /* SDIO 4-Bit Enable */
+#define                       MWE  0x8        /* Moving Window Enable */
+#define                    SD_RST  0x10       /* SDMMC Reset */
+#define                 PUP_SDDAT  0x20       /* Pull-up SD_DAT */
+#define                PUP_SDDAT3  0x40       /* Pull-up SD_DAT3 */
+#define                 PD_SDDAT3  0x80       /* Pull-down SD_DAT3 */
+
+/* Bit masks for SDH_RD_WAIT_EN */
+#define                       RWR  0x1        /* Read Wait Request */
+
+#endif
diff --git a/include/asm-blackfin/mmc.h b/include/asm-blackfin/mmc.h
new file mode 100644
index 0000000..aa2ac95
--- /dev/null
+++ b/include/asm-blackfin/mmc.h
@@ -0,0 +1 @@
+#include <asm-avr32/arch-at32ap700x/mmc.h>
diff --git a/include/asm-blackfin/posix_types.h b/include/asm-blackfin/posix_types.h
index 39651d2..000ffe5 100644
--- a/include/asm-blackfin/posix_types.h
+++ b/include/asm-blackfin/posix_types.h
@@ -40,15 +40,17 @@
 typedef unsigned short __kernel_nlink_t;
 typedef long __kernel_off_t;
 typedef int __kernel_pid_t;
-typedef unsigned short __kernel_ipc_pid_t;
-typedef unsigned short __kernel_uid_t;
-typedef unsigned short __kernel_gid_t;
-typedef unsigned int __kernel_size_t;
-typedef int __kernel_ssize_t;
+typedef unsigned int __kernel_ipc_pid_t;
+typedef unsigned int __kernel_uid_t;
+typedef unsigned int __kernel_gid_t;
+typedef unsigned long __kernel_size_t;
+typedef long __kernel_ssize_t;
 typedef int __kernel_ptrdiff_t;
 typedef long __kernel_time_t;
 typedef long __kernel_suseconds_t;
 typedef long __kernel_clock_t;
+typedef int __kernel_timer_t;
+typedef int __kernel_clockid_t;
 typedef int __kernel_daddr_t;
 typedef char *__kernel_caddr_t;
 typedef unsigned short __kernel_uid16_t;
@@ -67,14 +69,10 @@
 #endif
 
 typedef struct {
-#if defined(__KERNEL__) || defined(__USE_ALL)
 	int val[2];
-#else				/* !defined(__KERNEL__) && !defined(__USE_ALL) */
-	int __val[2];
-#endif				/* !defined(__KERNEL__) && !defined(__USE_ALL) */
 } __kernel_fsid_t;
 
-#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)
+#if defined(__KERNEL__)
 
 #undef	__FD_SET
 #define	__FD_SET(d, set)	((set)->fds_bits[__FDELT(d)] |= __FDMASK(d))
@@ -88,6 +86,6 @@
 #undef	__FD_ZERO
 #define __FD_ZERO(fdsetp) (memset (fdsetp, 0, sizeof(*(fd_set *)fdsetp)))
 
-#endif	/* defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2) */
+#endif				/* defined(__KERNEL__) */
 
 #endif
diff --git a/lib_blackfin/Makefile b/lib_blackfin/Makefile
index fee0fda..46ef7f3 100644
--- a/lib_blackfin/Makefile
+++ b/lib_blackfin/Makefile
@@ -39,10 +39,10 @@
 COBJS-y	+= board.o
 COBJS-y	+= boot.o
 COBJS-y	+= cache.o
+COBJS-y	+= clocks.o
 COBJS-y	+= muldi3.o
-COBJS-y	+= post.o
+COBJS-$(CONFIG_POST) += post.o tests.o
 COBJS-y	+= string.o
-COBJS-y	+= tests.o
 
 SRCS	:= $(SOBJS-y:.o=.S) $(COBJS-y:.o=.c)
 OBJS	:= $(addprefix $(obj),$(SOBJS-y) $(COBJS-y))
diff --git a/lib_blackfin/board.c b/lib_blackfin/board.c
index 01b71d4..c223711 100644
--- a/lib_blackfin/board.c
+++ b/lib_blackfin/board.c
@@ -44,50 +44,6 @@
 #endif
 }
 
-/* Get the input voltage */
-static u_long get_vco(void)
-{
-	u_long msel;
-	u_long vco;
-
-	msel = (*pPLL_CTL >> 9) & 0x3F;
-	if (0 == msel)
-		msel = 64;
-
-	vco = CONFIG_CLKIN_HZ;
-	vco >>= (1 & *pPLL_CTL);	/* DF bit */
-	vco = msel * vco;
-	return vco;
-}
-
-/* Get the Core clock */
-u_long get_cclk(void)
-{
-	u_long csel, ssel;
-	if (*pPLL_STAT & 0x1)
-		return CONFIG_CLKIN_HZ;
-
-	ssel = *pPLL_DIV;
-	csel = ((ssel >> 4) & 0x03);
-	ssel &= 0xf;
-	if (ssel && ssel < (1 << csel))	/* SCLK > CCLK */
-		return get_vco() / ssel;
-	return get_vco() >> csel;
-}
-
-/* Get the System clock */
-u_long get_sclk(void)
-{
-	u_long ssel;
-
-	if (*pPLL_STAT & 0x1)
-		return CONFIG_CLKIN_HZ;
-
-	ssel = (*pPLL_DIV & 0xf);
-
-	return get_vco() / ssel;
-}
-
 static void *mem_malloc_start, *mem_malloc_end, *mem_malloc_brk;
 
 static void mem_malloc_init(void)
@@ -114,7 +70,11 @@
 static int display_banner(void)
 {
 	printf("\n\n%s\n\n", version_string);
-	printf("CPU:   ADSP " MK_STR(CONFIG_BFIN_CPU) " (Detected Rev: 0.%d)\n", bfin_revid());
+	printf("CPU:   ADSP " MK_STR(CONFIG_BFIN_CPU) " "
+		"(Detected Rev: 0.%d) "
+		"(%s boot)\n",
+		bfin_revid(),
+		get_bfin_boot_mode(CONFIG_BFIN_BOOT_MODE));
 	return 0;
 }
 
@@ -435,10 +395,19 @@
 #ifdef CONFIG_CMD_NET
 	printf("Net:   ");
 	eth_initialize(gd->bd);
-	if (getenv("ethaddr"))
+	if ((s = getenv("ethaddr"))) {
+# ifndef CONFIG_NET_MULTI
+		size_t i;
+		char *e;
+		for (i = 0; i < 6; ++i) {
+			bd->bi_enetaddr[i] = simple_strtoul(s, &e, 16);
+			s = (*e) ? e + 1 : e;
+		}
+# endif
 		printf("MAC:   %02X:%02X:%02X:%02X:%02X:%02X\n",
 			bd->bi_enetaddr[0], bd->bi_enetaddr[1], bd->bi_enetaddr[2],
 			bd->bi_enetaddr[3], bd->bi_enetaddr[4], bd->bi_enetaddr[5]);
+	}
 #endif
 
 	display_global_data();
diff --git a/lib_blackfin/cache.c b/lib_blackfin/cache.c
index 870c5bf..1557864 100644
--- a/lib_blackfin/cache.c
+++ b/lib_blackfin/cache.c
@@ -15,15 +15,25 @@
 
 void flush_cache(unsigned long addr, unsigned long size)
 {
+	void *start_addr, *end_addr;
+	int istatus, dstatus;
+
 	/* no need to flush stuff in on chip memory (L1/L2/etc...) */
 	if (addr >= 0xE0000000)
 		return;
 
-	if (icache_status())
-		blackfin_icache_flush_range((void *)addr, (void *)(addr + size));
+	start_addr = (void *)addr;
+	end_addr = (void *)(addr + size);
+	istatus = icache_status();
+	dstatus = dcache_status();
 
-	if (dcache_status())
-		blackfin_dcache_flush_range((void *)addr, (void *)(addr + size));
+	if (istatus) {
+		if (dstatus)
+			blackfin_icache_dcache_flush_range(start_addr, end_addr);
+		else
+			blackfin_icache_flush_range(start_addr, end_addr);
+	} else if (dstatus)
+		blackfin_dcache_flush_range(start_addr, end_addr);
 }
 
 void icache_enable(void)
diff --git a/lib_blackfin/clocks.c b/lib_blackfin/clocks.c
new file mode 100644
index 0000000..0be395b
--- /dev/null
+++ b/lib_blackfin/clocks.c
@@ -0,0 +1,77 @@
+/*
+ * clocks.c - figure out sclk/cclk/vco and such
+ *
+ * Copyright (c) 2005-2008 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <common.h>
+#include <asm/blackfin.h>
+
+/* Get the voltage input multiplier */
+static u_long cached_vco_pll_ctl, cached_vco;
+u_long get_vco(void)
+{
+	u_long msel;
+
+	u_long pll_ctl = bfin_read_PLL_CTL();
+	if (pll_ctl == cached_vco_pll_ctl)
+		return cached_vco;
+	else
+		cached_vco_pll_ctl = pll_ctl;
+
+	msel = (pll_ctl >> 9) & 0x3F;
+	if (0 == msel)
+		msel = 64;
+
+	cached_vco = CONFIG_CLKIN_HZ;
+	cached_vco >>= (1 & pll_ctl);	/* DF bit */
+	cached_vco *= msel;
+	return cached_vco;
+}
+
+/* Get the Core clock */
+static u_long cached_cclk_pll_div, cached_cclk;
+u_long get_cclk(void)
+{
+	u_long csel, ssel;
+
+	if (bfin_read_PLL_STAT() & 0x1)
+		return CONFIG_CLKIN_HZ;
+
+	ssel = bfin_read_PLL_DIV();
+	if (ssel == cached_cclk_pll_div)
+		return cached_cclk;
+	else
+		cached_cclk_pll_div = ssel;
+
+	csel = ((ssel >> 4) & 0x03);
+	ssel &= 0xf;
+	if (ssel && ssel < (1 << csel))	/* SCLK > CCLK */
+		cached_cclk = get_vco() / ssel;
+	else
+		cached_cclk = get_vco() >> csel;
+	return cached_cclk;
+}
+
+/* Get the System clock */
+static u_long cached_sclk_pll_div, cached_sclk;
+u_long get_sclk(void)
+{
+	u_long ssel;
+
+	if (bfin_read_PLL_STAT() & 0x1)
+		return CONFIG_CLKIN_HZ;
+
+	ssel = bfin_read_PLL_DIV();
+	if (ssel == cached_sclk_pll_div)
+		return cached_sclk;
+	else
+		cached_sclk_pll_div = ssel;
+
+	ssel &= 0xf;
+
+	cached_sclk = get_vco() / ssel;
+	return cached_sclk;
+}
diff --git a/lib_blackfin/post.c b/lib_blackfin/post.c
index 4ab9e8b..35ccd3c 100644
--- a/lib_blackfin/post.c
+++ b/lib_blackfin/post.c
@@ -30,8 +30,6 @@
 #include <logbuff.h>
 #endif
 
-#ifdef CONFIG_POST
-
 DECLARE_GLOBAL_DATA_PTR;
 
 #define POST_MAX_NUMBER		32
@@ -421,5 +419,3 @@
 {
 	return (unsigned long)get_ticks() / (get_tbclk() / CONFIG_SYS_HZ) - base;
 }
-
-#endif				/* CONFIG_POST */
diff --git a/lib_blackfin/string.c b/lib_blackfin/string.c
index 36eecdf..12b6d24 100644
--- a/lib_blackfin/string.c
+++ b/lib_blackfin/string.c
@@ -136,6 +136,16 @@
  */
 void dma_memcpy_nocache(void *dst, const void *src, size_t count)
 {
+	uint16_t wdsize, mod;
+
+	/* Disable DMA in case it's still running (older u-boot's did not
+	 * always turn them off).  Do it before the if statement below so
+	 * we can be cheap and not do a SSYNC() due to the forced abort.
+	 */
+	bfin_write_MDMA_D0_CONFIG(0);
+	bfin_write_MDMA_S0_CONFIG(0);
+	bfin_write_MDMA_D0_IRQ_STATUS(DMA_RUN | DMA_DONE | DMA_ERR);
+
 	/* Scratchpad cannot be a DMA source or destination */
 	if (((unsigned long)src >= L1_SRAM_SCRATCH &&
 	     (unsigned long)src < L1_SRAM_SCRATCH_END) ||
@@ -143,9 +153,18 @@
 	     (unsigned long)dst < L1_SRAM_SCRATCH_END))
 		hang();
 
-	bfin_write_MDMA_S0_CONFIG(0);
-	bfin_write_MDMA_D0_CONFIG(0);
-	bfin_write_MDMA_D0_IRQ_STATUS(DMA_RUN | DMA_DONE | DMA_ERR);
+	if (((unsigned long)dst | (unsigned long)src | count) & 0x1) {
+		wdsize = WDSIZE_8;
+		mod = 1;
+	} else if (((unsigned long)dst | (unsigned long)src | count) & 0x2) {
+		wdsize = WDSIZE_16;
+		count >>= 1;
+		mod = 2;
+	} else {
+		wdsize = WDSIZE_32;
+		count >>= 2;
+		mod = 4;
+	}
 
 	/* Copy sram functions from sdram to sram */
 	/* Setup destination start address */
@@ -153,25 +172,24 @@
 	/* Setup destination xcount */
 	bfin_write_MDMA_D0_X_COUNT(count);
 	/* Setup destination xmodify */
-	bfin_write_MDMA_D0_X_MODIFY(1);
+	bfin_write_MDMA_D0_X_MODIFY(mod);
 
 	/* Setup Source start address */
 	bfin_write_MDMA_S0_START_ADDR(src);
 	/* Setup Source xcount */
 	bfin_write_MDMA_S0_X_COUNT(count);
 	/* Setup Source xmodify */
-	bfin_write_MDMA_S0_X_MODIFY(1);
+	bfin_write_MDMA_S0_X_MODIFY(mod);
 
 	/* Enable source DMA */
-	bfin_write_MDMA_S0_CONFIG(DMAEN);
-
-	bfin_write_MDMA_D0_CONFIG(WNR | DMAEN);
+	bfin_write_MDMA_S0_CONFIG(wdsize | DMAEN);
+	bfin_write_MDMA_D0_CONFIG(wdsize | DMAEN | WNR | DI_EN);
 	SSYNC();
 
-	while (bfin_read_MDMA_D0_IRQ_STATUS() & DMA_RUN)
+	while (!(bfin_read_MDMA_D0_IRQ_STATUS() & DMA_DONE))
 		continue;
 
-	bfin_write_MDMA_D0_IRQ_STATUS(bfin_read_MDMA_D0_IRQ_STATUS() | DMA_RUN | DMA_DONE | DMA_ERR);
+	bfin_write_MDMA_D0_IRQ_STATUS(DMA_RUN | DMA_DONE | DMA_ERR);
 	bfin_write_MDMA_D0_CONFIG(0);
 	bfin_write_MDMA_S0_CONFIG(0);
 }
diff --git a/lib_blackfin/tests.c b/lib_blackfin/tests.c
index c2319ec..bf7fba0 100644
--- a/lib_blackfin/tests.c
+++ b/lib_blackfin/tests.c
@@ -27,7 +27,6 @@
 
 #include <common.h>
 #include <config.h>
-#ifdef CONFIG_POST
 
 #include <post.h>
 #define CONFIG_SYS_POST_FLASH  0x00004000
@@ -249,5 +248,3 @@
 };
 
 unsigned int post_list_size = sizeof(post_list) / sizeof(struct post_test);
-
-#endif				/* CONFIG_POST */