#6
by かずま » 8年前
usao さんが書きました:(2)(何らかの意味で)より良い実装方法(アルゴリズム?)を御存じの方がおられましたら,ご教示願いたいです.
コード:
int iMiddle = (iMax+1)%3;
int iMin = (iMax+2)%3;
より
コード:
int iMiddle = !iMax; // または iMiddle = (2 - iMax) >> 1;
int iMin = 3 - iMax - iMiddle;
のほうが、割り算しないから速いと思うんですが、
コード:
int get();
void func(int, int);
void f1()
{
int iMax = get();
int iMiddle = (iMax + 1) % 3;
int iMin = (iMax + 2) % 3;
func(iMiddle, iMin);
}
void f2()
{
int iMax = get();
int iMiddle = !iMax;
int iMin = 3 - iMiddle - iMax;
func(iMiddle, iMin);
}
void f3()
{
int iMax = get();
int iMiddle = (2 - iMax) >> 1;
int iMin = 3 - iMiddle - iMax;
func(iMiddle, iMin);
}
gcc -O2 -c hoge.c; objdump -d hoge.o の結果の抜粋
コード:
0000000000000000 <f1>:
0: 48 83 ec 28 sub $0x28,%rsp
4: e8 00 00 00 00 callq 9 <f1+0x9>
9: 44 8d 48 02 lea 0x2(%rax),%r9d
d: 41 b8 56 55 55 55 mov $0x55555556,%r8d
13: 89 c1 mov %eax,%ecx
15: 83 c1 01 add $0x1,%ecx
18: 44 89 c8 mov %r9d,%eax
1b: 41 f7 e8 imul %r8d
1e: 44 89 c8 mov %r9d,%eax
21: c1 f8 1f sar $0x1f,%eax
24: 29 c2 sub %eax,%edx
26: 8d 04 52 lea (%rdx,%rdx,2),%eax
29: 41 29 c1 sub %eax,%r9d
2c: 89 c8 mov %ecx,%eax
2e: 41 f7 e8 imul %r8d
31: 89 c8 mov %ecx,%eax
33: c1 f8 1f sar $0x1f,%eax
36: 29 c2 sub %eax,%edx
38: 8d 04 52 lea (%rdx,%rdx,2),%eax
3b: 44 89 ca mov %r9d,%edx
3e: 29 c1 sub %eax,%ecx
40: 48 83 c4 28 add $0x28,%rsp
44: e9 00 00 00 00 jmpq 49 <f1+0x49>
0000000000000050 <f2>:
50: 48 83 ec 28 sub $0x28,%rsp
54: e8 00 00 00 00 callq 59 <f2+0x9>
59: 31 c9 xor %ecx,%ecx
5b: 85 c0 test %eax,%eax
5d: ba 03 00 00 00 mov $0x3,%edx
62: 0f 94 c1 sete %cl
65: 29 ca sub %ecx,%edx
67: 29 c2 sub %eax,%edx
69: 48 83 c4 28 add $0x28,%rsp
6d: e9 00 00 00 00 jmpq 72 <f2+0x22>
0000000000000080 <f3>:
80: 48 83 ec 28 sub $0x28,%rsp
84: e8 00 00 00 00 callq 89 <f3+0x9>
89: b9 02 00 00 00 mov $0x2,%ecx
8e: ba 03 00 00 00 mov $0x3,%edx
93: 29 c1 sub %eax,%ecx
95: d1 f9 sar %ecx
97: 29 ca sub %ecx,%edx
99: 29 c2 sub %eax,%edx
9b: 48 83 c4 28 add $0x28,%rsp
9f: e9 00 00 00 00 jmpq a4 <f3+0x24>
VC++ で、cl -O2 -c hoge.c; dumpbin -disasm hoge.obj の結果の抜粋
コード:
_f1:
00000000: 56 push esi
00000001: E8 00 00 00 00 call _get
00000006: 8B C8 mov ecx,eax
00000008: BE 03 00 00 00 mov esi,3
0000000D: 8D 41 02 lea eax,[ecx+2]
00000010: 99 cdq
00000011: F7 FE idiv eax,esi
00000013: 8D 41 01 lea eax,[ecx+1]
00000016: 52 push edx
00000017: 99 cdq
00000018: F7 FE idiv eax,esi
0000001A: 52 push edx
0000001B: E8 00 00 00 00 call _func
00000020: 83 C4 08 add esp,8
00000023: 5E pop esi
00000024: C3 ret
_f2:
00000000: E8 00 00 00 00 call _get
00000005: 33 D2 xor edx,edx
00000007: B9 03 00 00 00 mov ecx,3
0000000C: 85 C0 test eax,eax
0000000E: 0F 94 C2 sete dl
00000011: 2B CA sub ecx,edx
00000013: 2B C8 sub ecx,eax
00000015: 51 push ecx
00000016: 52 push edx
00000017: E8 00 00 00 00 call _func
0000001C: 83 C4 08 add esp,8
0000001F: C3 ret
_f3:
00000000: E8 00 00 00 00 call _get
00000005: BA 02 00 00 00 mov edx,2
0000000A: B9 03 00 00 00 mov ecx,3
0000000F: 2B D0 sub edx,eax
00000011: D1 FA sar edx,1
00000013: 2B CA sub ecx,edx
00000015: 2B C8 sub ecx,eax
00000017: 51 push ecx
00000018: 52 push edx
00000019: E8 00 00 00 00 call _func
0000001E: 83 C4 08 add esp,8
00000021: C3 ret
[quote="usao" id=3,19211,145479](2)(何らかの意味で)より良い実装方法(アルゴリズム?)を御存じの方がおられましたら,ご教示願いたいです.[/quote]
[code]
int iMiddle = (iMax+1)%3;
int iMin = (iMax+2)%3;
[/code]
より
[code]
int iMiddle = !iMax; // または iMiddle = (2 - iMax) >> 1;
int iMin = 3 - iMax - iMiddle;
[/code]
のほうが、割り算しないから速いと思うんですが、
[code]
int get();
void func(int, int);
void f1()
{
int iMax = get();
int iMiddle = (iMax + 1) % 3;
int iMin = (iMax + 2) % 3;
func(iMiddle, iMin);
}
void f2()
{
int iMax = get();
int iMiddle = !iMax;
int iMin = 3 - iMiddle - iMax;
func(iMiddle, iMin);
}
void f3()
{
int iMax = get();
int iMiddle = (2 - iMax) >> 1;
int iMin = 3 - iMiddle - iMax;
func(iMiddle, iMin);
}
[/code]
gcc -O2 -c hoge.c; objdump -d hoge.o の結果の抜粋
[code=text]
0000000000000000 <f1>:
0: 48 83 ec 28 sub $0x28,%rsp
4: e8 00 00 00 00 callq 9 <f1+0x9>
9: 44 8d 48 02 lea 0x2(%rax),%r9d
d: 41 b8 56 55 55 55 mov $0x55555556,%r8d
13: 89 c1 mov %eax,%ecx
15: 83 c1 01 add $0x1,%ecx
18: 44 89 c8 mov %r9d,%eax
1b: 41 f7 e8 imul %r8d
1e: 44 89 c8 mov %r9d,%eax
21: c1 f8 1f sar $0x1f,%eax
24: 29 c2 sub %eax,%edx
26: 8d 04 52 lea (%rdx,%rdx,2),%eax
29: 41 29 c1 sub %eax,%r9d
2c: 89 c8 mov %ecx,%eax
2e: 41 f7 e8 imul %r8d
31: 89 c8 mov %ecx,%eax
33: c1 f8 1f sar $0x1f,%eax
36: 29 c2 sub %eax,%edx
38: 8d 04 52 lea (%rdx,%rdx,2),%eax
3b: 44 89 ca mov %r9d,%edx
3e: 29 c1 sub %eax,%ecx
40: 48 83 c4 28 add $0x28,%rsp
44: e9 00 00 00 00 jmpq 49 <f1+0x49>
0000000000000050 <f2>:
50: 48 83 ec 28 sub $0x28,%rsp
54: e8 00 00 00 00 callq 59 <f2+0x9>
59: 31 c9 xor %ecx,%ecx
5b: 85 c0 test %eax,%eax
5d: ba 03 00 00 00 mov $0x3,%edx
62: 0f 94 c1 sete %cl
65: 29 ca sub %ecx,%edx
67: 29 c2 sub %eax,%edx
69: 48 83 c4 28 add $0x28,%rsp
6d: e9 00 00 00 00 jmpq 72 <f2+0x22>
0000000000000080 <f3>:
80: 48 83 ec 28 sub $0x28,%rsp
84: e8 00 00 00 00 callq 89 <f3+0x9>
89: b9 02 00 00 00 mov $0x2,%ecx
8e: ba 03 00 00 00 mov $0x3,%edx
93: 29 c1 sub %eax,%ecx
95: d1 f9 sar %ecx
97: 29 ca sub %ecx,%edx
99: 29 c2 sub %eax,%edx
9b: 48 83 c4 28 add $0x28,%rsp
9f: e9 00 00 00 00 jmpq a4 <f3+0x24>
[/code]
VC++ で、cl -O2 -c hoge.c; dumpbin -disasm hoge.obj の結果の抜粋
[code=text]
_f1:
00000000: 56 push esi
00000001: E8 00 00 00 00 call _get
00000006: 8B C8 mov ecx,eax
00000008: BE 03 00 00 00 mov esi,3
0000000D: 8D 41 02 lea eax,[ecx+2]
00000010: 99 cdq
00000011: F7 FE idiv eax,esi
00000013: 8D 41 01 lea eax,[ecx+1]
00000016: 52 push edx
00000017: 99 cdq
00000018: F7 FE idiv eax,esi
0000001A: 52 push edx
0000001B: E8 00 00 00 00 call _func
00000020: 83 C4 08 add esp,8
00000023: 5E pop esi
00000024: C3 ret
_f2:
00000000: E8 00 00 00 00 call _get
00000005: 33 D2 xor edx,edx
00000007: B9 03 00 00 00 mov ecx,3
0000000C: 85 C0 test eax,eax
0000000E: 0F 94 C2 sete dl
00000011: 2B CA sub ecx,edx
00000013: 2B C8 sub ecx,eax
00000015: 51 push ecx
00000016: 52 push edx
00000017: E8 00 00 00 00 call _func
0000001C: 83 C4 08 add esp,8
0000001F: C3 ret
_f3:
00000000: E8 00 00 00 00 call _get
00000005: BA 02 00 00 00 mov edx,2
0000000A: B9 03 00 00 00 mov ecx,3
0000000F: 2B D0 sub edx,eax
00000011: D1 FA sar edx,1
00000013: 2B CA sub ecx,edx
00000015: 2B C8 sub ecx,eax
00000017: 51 push ecx
00000018: 52 push edx
00000019: E8 00 00 00 00 call _func
0000001E: 83 C4 08 add esp,8
00000021: C3 ret
[/code]