[準備/原創]:親身體驗多核/超線程
背包問題,我寫了兩個版本的算法程序,bag 和 bag-mt 其中後者是多線程版本,其
使用 4 線程分段計算驗證,編寫語言是 ASM ,詳情請向下觀賞:
概述:
1 我的程序對內存訪問也是比較頻繁密集的,還有就是 xeon 是負載服務器,上麵
還有N個後台進程在駐留,可能可這個也有關係。
2 為了加速,我比較多的使用局部變量,省去臨界區,互斥事件的耗時。最後匯總。
源碼:
bag:
.386
.model flat,stdcall
option casemap:none
include c:/masm32/include/windows.inc
include c:/masm32/include/user32.inc
includelib c:/masm32/lib/user32.lib
include c:/masm32/include/kernel32.inc
includelib c:/masm32/lib/kernel32.lib
CDim equ 28
.const
wetdb220,56,79,135,21,5,175,90,11,153;KG
db25,68,123,35,8,94,207,2,67,159
db78,34,43,12,167,231,12,54,98,32
db65,146,57,234,65,93,56,243,8,56
valdb60,100,45,26,90,10,57,240,136,230;$
db78,34,7,23,89,250,129,94,45,24
db56,89,34,124,78,5,45,234,89,32
db34,72,50,147,180,210,45,18,69,94
limwetdd400;Limit Wet
twodd2
ftdb'TTs == %d ,MaxWet == %d , MaxVal == %d ,TakeTime == %d',0
cpdb'Windows XP',0
.data?
MaxValdd?
MaxWetdd?
nbndd?
tmptimedd?
TTsdd?
bufdb256dup(?)
.code
;~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
EnumCtproc
localTmpWet
locali
movnbn,1
movTmpWet,0
.whilenbn<=0fffffffh;2^CDim - 1
movi,0
xorebx,ebx
.whilei<CDim
moveax,i
btnbn,eax
.ifCARRY?
moval,wet[eax]
andeax,0ffh
addebx,eax
.endif
inci
.endw
.ifebx > limwet
jmpa0
.endif
movTmpWet,ebx
movi,0
xorebx,ebx
.whilei<CDim
moveax,i
btnbn,eax
.ifCARRY?
moval,val[eax]
andeax,0ffh
addebx,eax
.endif
inci
.endw
.ifebx > MaxVal
movMaxVal,ebx
movebx,TmpWet
movMaxWet,ebx
.endif
a0:
incTTs
incnbn
.endw
ret
EnumCtendp
;~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
start:
movTTs,0
movMaxVal,0
movMaxWet,0
invokeGetTickCount
movtmptime,eax
invokeEnumCt
invokeGetTickCount
subeax,tmptime
invokewsprintf,addr buf,addr ft,TTs,MaxWet,MaxVal,eax
invokeMessageBox,NULL,addr buf,addr cp,MB_OK
invokeExitProcess,NULL
;~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
endstart
bag-mt:
.386
.model flat,stdcall
option casemap:none
includec:/masm32/include/windows.inc
includec:/masm32/include/user32.inc
includelibc:/masm32/lib/user32.lib
includec:/masm32/include/kernel32.inc
includelibc:/masm32/lib/kernel32.lib
CDimequ28
cststruct
sfdd?
efdd?
ttsdd?
mwdd?
mvdd?
cstends
.const
wetdb220,56,79,135,21,5,175,90,11,153;KG
db25,68,123,35,8,94,207,2,67,159
db78,34,43,12,167,231,12,54,98,32
db65,146,57,234,65,93,56,243,8,56
valdb60,100,45,26,90,10,57,240,136,230;$
db78,34,7,23,89,250,129,94,45,24
db56,89,34,124,78,5,45,234,89,32
db34,72,50,147,180,210,45,18,69,94
limwetdd400;Limit Wet
ft0db'TTs == %d ,MaxWet == %d , MaxVal == %d ',0
ft1db'Total TakeTime == %d',0
cpdb'Windows XP',0
.data
cst0cst<1,4000000h,0,0,0>
cst1cst<4000001h,8000000h,0,0,0>
cst2cst<8000001h,0c000000h,0,0,0>
cst3cst<0c000001h,0fffffffh,0,0,0>
.data?
tmptimedd?
ht0dd?
ht1dd?
ht2dd?
ht3dd?
bufdb256dup(?)
.code
;~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
EnumCtproclpcst
localTmpWet
localTmpnbn
locali
movedi,lpcst
assumeedi:ptr cst
push [edi].sf
popTmpnbn
movTmpWet,0
movedx,[edi].ef
.whileTmpnbn<=edx;2^CDim - 1
movi,0
xorebx,ebx
.whilei<CDim
moveax,i
btTmpnbn,eax
.ifCARRY?
moval,wet[eax]
andeax,0ffh
addebx,eax
.endif
inci
.endw
.ifebx > limwet
jmpa0
.endif
movTmpWet,ebx
movi,0
xorebx,ebx
.whilei<CDim
moveax,i
btTmpnbn,eax
.ifCARRY?
moval,val[eax]
andeax,0ffh
addebx,eax
.endif
inci
.endw
.ifebx > [edi].mv
mov[edi].mv,ebx
movebx,TmpWet
mov[edi].mw,ebx
.endif
a0:
inc[edi].tts
incTmpnbn
.endw
ret
EnumCtendp
;~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
start:
invokeGetTickCount
movtmptime,eax
invokeCreateThread,NULL,0,addr EnumCt,addr cst0,/
NULL,NULL
movht0,eax
invokeCreateThread,NULL,0,addr EnumCt,addr cst1,/
NULL,NULL
movht1,eax
invokeCreateThread,NULL,0,addr EnumCt,addr cst2,/
NULL,NULL
movht2,eax
invokeCreateThread,NULL,0,addr EnumCt,addr cst3,/
NULL,NULL
movht3,eax
invokeWaitForMultipleObjects,4,addr ht0,TRUE,INFINITE
invokeGetTickCount
subeax,tmptime
movtmptime,eax
invokewsprintf,addr buf,addr ft0,cst0.tts,cst0.mw,cst0.mv
invokeMessageBox,NULL,addr buf,addr cp,MB_OK
invokewsprintf,addr buf,addr ft0,cst1.tts,cst1.mw,cst1.mv
invokeMessageBox,NULL,addr buf,addr cp,MB_OK
invokewsprintf,addr buf,addr ft0,cst2.tts,cst2.mw,cst2.mv
invokeMessageBox,NULL,addr buf,addr cp,MB_OK
invokewsprintf,addr buf,addr ft0,cst3.tts,cst3.mw,cst3.mv
invokeMessageBox,NULL,addr buf,addr cp,MB_OK
invokewsprintf,addr buf,addr ft1,tmptime
invokeMessageBox,NULL,addr buf,addr cp,MB_OK
invokeExitProcess,NULL
;~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
endstart
對於 bag-mt 略作說明:
a 采用的是 4 線程'並發'
b 使用了 對象等待 win32 API
以上代碼均用 masm32v8.2 編譯連接通過,其編譯連接指令為:
ml /c /coff name.asm
link /subsystem:console name.obj
歡迎大家用編譯連接後的 exe 作測試,並貼上測試結果,同時給出cpu類型
masm32 編譯器請到:
www.masm32.com 或 www.aogosoft.com 下載,有問題請和我聯係。
最後更新:2017-04-02 00:06:17