[PATCH] fix root cause of NAND trouble

Werner Almesberger werner at openmoko.org
Sun Nov 2 14:25:05 CET 2008


Ben Dooks wrote:
> actually, thinking about it, we can probably get better code by doing:
> 
> 	/* mop up any non-word aligned length reads. */
> 	for (i = (len & ~3); i != len; i++)
> 		ptr[i] = readb(info->regs + S3C2440_NFDATA);

It looks nicer, but, surprisingly, it's one instruction longer and
three instructions slower (in the normal case, i.e., with word
alignment), see below (with the "buf" fix).

Notation:
	; <instruction count> +<instructions executed after readsl>

- Werner

----- for (i = 0; i != (len & 3); i++) ------------------------------------

s3c2440_nand_read_buf:
	@ args = 0, pretend = 0, frame = 0
	@ frame_needed = 1, uses_anonymous_args = 0
	mov	ip, sp					; 1
	stmfd	sp!, {r4, r5, r6, fp, ip, lr, pc}	; 2
	sub	fp, ip, #4				; 3
	sub	sp, sp, #4				; 4
	ldr	r6, [r0, #536]				; 5
	mov	r4, r2					; 6
	ldr	r0, [r6, #96]				; 7
	cmp	r4, #0					; 8
	add	r2, r2, #3				; 9
	movge	r2, r4					; 10
	mov	r2, r2, asr #2				; 11
	bic	r3, r4, #3				; 12
	add	r0, r0, #16				; 13
	add	r5, r1, r3				; 14
	bl	__raw_readsl				; 15
	mov	r2, #0					; 16	+1
	b	.L212					; 17	+2
.L213:
	ldr	r3, [r6, #96]				; 18
	ldrb	r3, [r3, #16]	@ zero_extendqisi2	; 19
	strb	r3, [r5, r2]				; 20
	add	r2, r2, #1				; 21
.L212:
	and	r3, r4, #3				; 22	+3
	cmp	r2, r3					; 23	+4
	bne	.L213					; 24	+5
	ldmfd	sp, {r3, r4, r5, r6, fp, sp, pc}	; 25	+6

----- for (i = (len & ~3); i != len; i++) ---------------------------------

s3c2440_nand_read_buf:
	@ args = 0, pretend = 0, frame = 0
	@ frame_needed = 1, uses_anonymous_args = 0
	mov	ip, sp					; 1
	stmfd	sp!, {r4, r5, r6, fp, ip, lr, pc}	; 2
	sub	fp, ip, #4				; 3
	sub	sp, sp, #4				; 4
	ldr	r6, [r0, #536]				; 5
	mov	r5, r2					; 6
	ldr	r0, [r6, #96]				; 7
	cmp	r5, #0					; 8
	add	r2, r2, #3				; 9
	movge	r2, r5					; 10
	mov	r2, r2, asr #2				; 11
	add	r0, r0, #16				; 12
	mov	r4, r1					; 13
	bl	__raw_readsl				; 14
	bic	r1, r5, #3				; 15	+1
	add	r4, r4, r1				; 16	+2
	mov	r2, #0					; 17	+3
	b	.L212					; 18	+4
.L213:
	ldr	r3, [r6, #96]				; 19
	ldrb	r3, [r3, #16]	@ zero_extendqisi2	; 20
	strb	r3, [r4], #1				; 21
.L212:
	rsb	r3, r1, r5				; 22	+5
	cmp	r2, r3					; 23	+6
	add	r2, r2, #1				; 24	+7
	bne	.L213					; 25	+8
	ldmfd	sp, {r3, r4, r5, r6, fp, sp, pc}	; 26	+9




More information about the openmoko-kernel mailing list