这次更新第3章习题。

参考资料:

https://dreamanddead.github.io/CSAPP-3e-Solutions/chapter3/

https://cs.nyu.edu/wies/teaching/cso-fa19/class13_machine.pdf

Chapter 3

难题:3.59

3.58

汇编代码:

decode2: 
	subq 		%rdx, %rsi		# y = y - z
	imulq 		%rsi, %rdi		# x = x * y
	movq 		%rsi, %rax 		# tmp = y
	salq 		$63, %rax		# tmp = tmp << 63
	sarq 		$63, %rax		# tmp = tmp >> 63
	xorq 		%rdi, %rax		# tmp = tmp ^ x
	ret

对于移位操作,如果tmp为偶数,那么

(tmp << 63) >> 63 = 0...0

否则,

(tmp << 63) >> 63 = 1...1

因此C代码如下:

long decode2(long x, long y, long z){
    y = y - z;
    x = x * y;
    if (y & 1){
        return ~x;
    }else{
        return x;
    }
}

3.59

C代码:

typedef __int128 int128_t;

void store_prod(int128_t *dest, int64_t x, int64_t y) {
	*dest = x * (int128_t) y;
}

首先看数学关系:

因此:

注意到如果$x < 0$,那么

如果$x\ge 0$,那么

接着看汇编代码(注意参数分别存储在%rdi, %rsi, %rdx):

store_prod:
	movq	%rdx, %rax				# t1 = y
	cqto	          				# y, t1 = t1的128位符号拓展 	(y = yh, t1 = yl)
	movq	%rsi, %rcx				# t2 = x	
	sarq	$63, %rcx				# t2 = t2 >> 63	  			(if xh < 0, t2 = -1, else t2 = 0, 即t2 = xh)
	imulq	%rax, %rcx				# t2 = (t2 * t1) mod 2^64	 (t2 = xh * yl mod 2^64)
	imulq	%rsi, %rdx				# y = (y * x) mod 2^64		 (y = yh * xl mod 2^64)
	addq	%rdx, %rcx				# t2 = t2 + y				(t2 = xh * yl + yh * xl)
	mulq	%rsi      				# y, t1 = t1 * x = yl * xl	 (y = yl * xl / 2^64, t1 = yl * xl mod 2^64)
	addq	%rcx, %rdx				# y = y + t2				(y = (yh * xl + xh * yl + yl * xl / 2^64) mode 2^64)
	movq	%rax, (%rdi)			# *dest = t1 				(yl * xl mode 2^64)
	movq	%rdx, 8(%rdi)			# *(dest + 8) = y 			 ((yh * xl + xh * yl + yl * xl / 2^64) mode 2^64)
	ret

3.60

汇编代码:

long loop(long x, int n)
x in %rdi, n in %esi
loop:
	movl	%esi, %ecx		#t1 = n
	movl	$1,	%edx  		#t2 = 1
	movl	$0,	%eax  		#t3 = 0
	jmp		.L2     		#goto .L2
.L3:
	movq	%rdi, %r8		#t4 = x
	andq	%rdx, %r8		#t4 = t4 & t2
	orq		%r8, %rax		#t3 = t3 | t4
	salq	%cl, %rdx		#t2 = t2 << (low 8 bit of t1)
.L2:
	testq	%rdx, %rdx		#t2
	jne		.L3       		#if t2 != 0, then goto .L3
	rep:	ret	      		#return t3

对应C代码:

long loop(long x, int n)
{
    long result = 0;
    long mask;
    for (mask = 1; mask != 0; mask = (mask << (x & 0xFF))){
        result |= (x & mask) ;
    }

    return result;
}

3.61

C代码:

long cread_alt(long *xp){
    return ((!xp) ? 0 : *xp);
}

对应的汇编代码:

cread_alt:
	movq	$0,	%rax    	#v = 0
	testq	%rdi, %rdi		#test xp
	cmovne	(%rdi),	%rax	#if xp != NULL, v = *xp
	ret

3.62

汇编代码:

p1 in %rdi, p2 in %rsi, action in %edx
.L8: 													MODE_E
	movl	$27, %eax  						#t1 = 27
	ret
.L3:													MODE_A
	movq	(%rsi), %rax					#t1 = *p2
	movq	(%rdi), %rdx					#action = *p1
	movq	%rdx, (%rsi)					#*p2 = action
	ret
.L5:													MODE_B
	movq	(%rdi), %rax					#t1 = *p1
	addq	(%rsi), %rax					#t1 = t1 + *p2
	movq	%rax, (%rdi)					#*p1 = result
	ret
.L6:													MODE_C
	movq	$59, (%rdi)						#*p1 = 59
	movq	(%rsi), %rax					#t1 = *p2
	ret
.L7:													MODE_D
	movq	(%rsi), %rax					#t1 = *p2
	movq	%rax, (%rdi)					#*p1 = t1
	movl	$27, %eax   					#t1 = 27
	ret
.L7:													default
	movl	$12, %eax   					#t1 = 12
	ret

对应的C代码:

/* Enumerated type creates set of constants numbered 0 and upward */ 
typedef enum {MODE_A, MODE_B, MODE_C, MODE_D, MODE_E} mode_t;

long switch3(long *p1, long *p2, mode_t action)
{
	long result = 0; 
  	switch (action) { 
      	case MODE_A:
            result = *p2;
            action = *p1;
            *p2 = action;
            break;
        case MODE_B:
            result = *p1;
            result += *p2;
            *p1 = result;
            break;
        case MODE_C:
            *p1 = 59;
            result = *p2;
            break;
        case MODE_D:
            result = *p2;
            *p1 = result;
            result = 27;
            break;
        case MODE_E:
            result = 27;
            break;
       	default:
            result = 12;
    }
  
	return result;
}

3.63

C代码:

long switch_prob(long x, long n ) {
    long result = x;
    switch(n) {
        case 60:
        case 62:
            result = 8 * x;
            break;
        case 63:
            result = (x >> 3);
            break;
        case 64:
            result = x;
            result <<= 4;
            result -= x;
            x = result;
        case 65:
            x *= x;
        default:
            result = 75 + x;
            break;
    }
  
  	return result;
}

gdb结果:

(gdb) x/6gx 0x4006f8
0x4006f8:		0x00000000004005a1(n=60)	0x00000000004005c3(n=61, default)
0x400708:		0x00000000004005a1(n=62)	0x00000000004005aa(n=63)
0x400718:		0x00000000004005b2(n=64)	0x00000000004005bf(n=65)

汇编代码

long switch. prob(long x, 1ong n)
x in %rdi, n in %rsi
0000000000400590 <switch_prob>:
	400590:	48 83 ee 3c         	sub		$0x3c,%rsi                	#n -= 60
	400594:	48 83 fe 05         	cmp		$0x5,%rsi                 	#tmp = n - 5
	400598:	77 29               	ja		4005c3 <switch_ prob+0x33>	#if tmp > 0, goto 4005c3
	40059a:	ff 24 f5 f8 06 40 00	jmpq	*0x4006f8(,%rsi,8)        	#goto *[0x4006f8 + 8n]
	4005a1:	48 8d 04 fd 00 00 00	lea		0x0(,%rdi,8),%rax         	#t1 = 8 * x
	4005a8:	00
	4005a9:	c3                  	retq					    	   #return
	4005aa:	48 89 f8            	mov		%rdi,%rax                 	 #t1 = x
	4005ad:	48 c1 f8 03         	sar		$0x3,%rax			     	#t1 >>= 3
	4005b1:	c3		          	    retq							   #return
	4005b2:	48 89 f8             	mov		%rdi,%rax				  	#t1 = x
	4005b5:	48 c1 e0 04         	shl		$0x4,%rax				 	#t1 <<= 4
	4005b9:	48 29 f8            	sub		%rdi,%rax				  	#t1 -= x
	4005bc:	48 89 c7            	mov		%rax,%rdi					#x = t1
	4005bf:	48 0f af ff	        	imul	%rdi,%rdi					#x *= x
	4005c3: 48 8d 47 4b	        	lea		0x4b(%rdi),%rax				 #t1 = 0x4b + x
	4005c7:	c3			       	   retq								   #return

3.64

C代码:

long A[R][S][T];

long store_ele(long i, long j, long k, long *dest)
{
  *dest = A[i] [j] [k];
  return sizeof (A) ;
}

汇编代码:

long store_ele(long i, 1ong j, 1ong k, long *dest)
i in %rdi, j in %rsi, k in %rdx, dest in %rcx

store_ele:
	leaq	(%rsi,%rsi,2), %rax			#t1 = 3 * j
	leaq	(%rsi,%rax,4), %rax			#t1 = j + 4 * t1; t1 = 13j
	movq	%rdi, %rsi					#j = i
	salq	$6, %rsi		  			#j <<= 6; j = 64j = 64i
	addq	%rsi, %rdi					#i = i + j; i = 65i
	addq	%rax, %rdi					#i = i + t1; i = 65i + 13j
	addq	%rdi, %rdx					#k = k + i; k = 65i + 13j + k
	movq	A(,%rdx,8), %rax			#t1 = *(A + 8 * k)
	movq	%rax, (%rcx)				#*dest = rax
	movl	$3640, %eax					#t1 = 3640
	ret

推广214页的等式3.1可得:

对于

D A[R][S][T]

其地址为

对于此例

因此

结合

sizeof(A) = 3640

可得

3.65

C代码:

void transpose(long A[M] [M]) {
    long i, j;
    for(i = 0; i < M; i++)
        for(j = O; j < i; j++){
            long t = A[i][j];
            A[i][j] = A[j][i] ;
            A[j][i] = t;
        }
}

汇编代码:

#%rcx: t1, %rdx: t2, %rsi: t3, %rax: t4, %rdi: t5
.L6:
	movq		(%rdx), %rcx			#t1 = *t2
	movq		(%rax), %rsi			#t3 = *t4
	movq		%rsi, (%rdx)			#*t2 = t3
	movq		%rcx, (%rax)			#*t4 = t1
	addq		$8, %rdx				#t2 += 8
	addq		$120, %rax				#t4 += 120
	cmpq		%rdi, %rax				#t4 - t5
	jne			.L6						#if t4 - t5 != 0, goto .L6

注意到如下更新方式

t2 += 8

这说明$\%\text{rdx}$保存着指向$\text{A[i][j]}$的指针,因此$\text{t4}(\%\text{rax})$保存着指向$\text{A[j][i]}$的指针。

最后注意如下更新方式

t4 += 120

因此

3.66

C代码:

long sum_co1(long n, long A[NR(n)][NC(n)], long j) {
	long i;
	long result = 0;
	for (i = 0; i < NR(n); i++)
		result += A[i][j];
	return result;
}

汇编代码:

long sum_col(long n, long A[NR(n)][CNC(n)], long j)
n in %rdi, A in %rsi, j in %rdx
sum_col:
	leaq	1(,%rdi,4), %r8			 #r8 = 1 + 4 * n
	leaq	(%rdi,%rdi,2), %rax		 #rax = 3 * n
	movq	%rax, %rdi				#rdi = rax = 3 * n
	testq	%rax, %rax				#test rax
	jle		.L4					   #if rax <= 0, goto L4
	salq	$3, %r8				    #r8 = r8 << 3 = 8 * r8
	leaq	(%rsi,%rdx,8), %rcx		 #rcx = rsi + 8 * rdx = A + 8 * j
	movl	$0, %eax				#rax = 0 (result = 0)
	movl	$0, %edx				#rdx = 0 (i = 0)
.L3:
	addq	(%rcx), %rax			#rax = rax + *rcx
	addq	$1, %rdx				#rdx = rdx + 1
	addq	%r8, %rcx				#rcx = rcx + r8 = rcx + 8 * r8
	cmpq	%rdi, %rdx				#cmp rdx - rdi (cmp i - 3 * n)
	jne		.L3					    #if rdx - rdi != 0
	rep; ret
.L4:
	movl	$0, %eax				#rax = 0
ret

根据注释可得

NR(n) = 3 * n
CNC(n) = 1 + 4 * n

3.67

参考资料:

https://dreamanddead.github.io/CSAPP-3e-Solutions/chapter3/3.67/

C代码:

typedef struct {
	long a[2];
	long* p;
}strA;

typedef struct {
    long u[2];
    long q;
}strB;

strB process(strA s) {
	strB r;
	r.u[0] = s.a[1];
	r.u[1] = s.a[0];
	r.q = *s.P;
    return r;
}

long eval (1ong x, long y, long z){
	strA s;
	s.a[0] = x;
	s.a[1] = y;
	s.p = &z;
	strB r = process(s);
	return r.u[0] + r.u[1] + r.q;
)

内存中的布局:

strA:
[a[0]][a[1]][*p]
[8][8][8]

strB:
[u[0]][u[1]][q]
[8][8][8]

汇编代码:

strB process(strA s)
process:
	movq	%rdi, %rax		#t1 = r
	movq	24(%rsp), %rdx	#t2 = *(rsp + 24) (s.p)
	movq	(%rdx), %rdx	#t2 = *t2 = *(s.p)
	movq	16(%rsp), %rcx	#t3 = *(rsp + 16) (s.a[1])
	movq	%rcx, (%rdi)	#r.u[0] = t3 = s.a[1]
	movq	8(%rsp), %rcx	#t3 = *(rsp + 8)  (s.a[0])
	movq	%rcx, 8(%rdi)	#r.u[1] = t3 = s.a[0]
	movg	%rdx, 16(%rdi)	#r.q = t2 = *(s.p)
	ret
	
long eval(long x,long y, long z)
x in %rdi, y in %rsi, z in %rdx
eval:
	subq	$104, %rsp			#rsp = rsp - 104		(分配空间)
	movq	%rdx, 24(%rsp)		#*(rsp + 24) = z
	leaq	24(%rsp), %rax		#t1 = rsp + 24			(t1 = &z)
	movq	%rdi, (%rsp)		#*rsp = x 				(s.a[0] = x)
	movq	%rsi, 8(%rsp)		#*(rsp + 8) = y			(s.a[1] = y)
	movq	%rax, 16(%rsp)		#*(rsp + 16) = z		(s.p = &z)
	leaq	64(%rsp), %rdi		#rdi = *(rsp + 64)		(参数一)
	call	process
	movq	72(%rsp), %rax		#t1 = *(rsp + 72)		(t1 = r.u[1])
	addq	64(%rsp), %rax		#t1 = t1 + *(rsp + 64)	(t1 = t1 + r.u[0])
	addq	80(%rsp), %rax		#t1 = t1 + *(rsp + 80)	(t1 = t1 + r.q)
	addq	$104, %rsp			#rsp = rsp + 104
	ret
(a)
rsp + 104
rsp + 64 rdi
rsp + 24 z
rsp + 16 s.p = &z
rsp + 8 s.a[1] = y
rsp s.a[0] = x
(b)

%rdi(*(rsp + 64))

(c)

通过%rsp + 8, +16, + 24

(d)

通过%rsp + 64, +72, +80

(e)
rsp + 104
rsp + 80 r.q
rsp + 72 r.u[1]
rsp + 64 r.u[0]
rsp + 24 z
rsp + 16 s.p = &z
rsp + 8 s.a[1] = y
rsp(eval) s.a[0] = x
rsp - 8(rsp in process)
(f)

结构的值是在栈中传递。