Transcript Assembly 09

Assembly 09
Outline
•
•
•
•
•
•
1
Strings in x86
esi, edi, ecx, eax
stosb, stosw, stosd
cld, std
rep
loop
Strings in x86
• x86 string: any contiguous group of bytes in memory
• Not necessarily characters only
• Can also be words, dwords
• Arbitrary size
2
Strings in x86
• Unlike strings in C++, Java, Python, etc.
• x86 strings have no length counter
• no .length()
• x86 strings have no boundary character
• C-style strings end in ‘\0’ (null terminator)
3
Strings in x86
• “Think of strings as the register values that define them.”
• “Assembly strings are wholly defined by values you place in
registers”
• Pointer to string’s address in memory
• Length of string in ecx
4
msg: db “THIS IS A STRING”, 10 ; in .data
len: equ $-msg
ptr: dd 0x00
; declare 32-bit variable
mov eax, msg
mov dword [ptr], msg
mov ebx, [ptr]
; in .text (evaluate msg’s address)
; copy msg’s address to ptr
; evaluate ptr’s value
mov eax,4
mov ebx,1
mov ecx, [ptr]
mov edx, len
int 0x80
; write system call…
;
; use address stored in ptr
;
msg: db “THIS IS A STRING”, 10
len: equ $-msg
ptr: dd 0x00
mov eax, msg
mov dword [ptr], msg
mov ebx, [ptr]
mov eax,4
mov ebx,1
mov ecx, [ptr]
mov edx, len
int 0x80
UNIX> ./a.out
THIS IS A STRING
UNIX>
msg: db “THIS IS A STRING”, 10
len: equ $-msg
ptr: dd 0x00
eax
mov eax, msg
mov dword [ptr], msg
mov ebx, [ptr]
7
mov eax,4
mov ebx,1
mov ecx, [ptr]
mov edx, len
int 0x80
ebx
ptr
msg: db “THIS IS A STRING”, 10
len: equ $-msg
ptr: dd 0x00
eax
mov eax, msg
mov dword [ptr], msg
mov ebx, [ptr]
8
mov eax,4
mov ebx,1
mov ecx, [ptr]
mov edx, len
int 0x80
ebx
ptr
0
msg’s address
msg: db “THIS IS A STRING”, 10
len: equ $-msg
ptr: dd 0x00
eax
mov eax, msg
mov dword [ptr], msg
mov ebx, [ptr]
9
mov eax,4
mov ebx,1
mov ecx, [ptr]
mov edx, len
int 0x80
0x080490B8
ebx
ptr
0
msg: db “THIS IS A STRING”, 10
len: equ $-msg
ptr: dd 0x00
eax
mov eax, msg
mov dword [ptr], msg
mov ebx, [ptr]
mov eax,4
mov ebx,1
mov ecx, [ptr]
mov edx, len
10 int 0x80
0x080490B8
ebx
ptr
0x080490B8
msg: db “THIS IS A STRING”, 10
len: equ $-msg
ptr: dd 0x00
mov eax, msg
mov dword [ptr], msg
mov ebx, [ptr]
mov eax,4
mov ebx,1
mov ecx, [ptr]
mov edx, len
11 int 0x80
eax
0x080490B8
ebx
0x080490B8
ptr
0x080490B8
msg: db “THIS IS A STRING”, 10
len: equ $-msg
ptr: dd 0x00
mov eax, msg
mov dword [ptr], msg
mov ebx, [ptr]
mov eax,4
mov ebx,1
mov ecx, [ptr]
mov edx, len
12 int 0x80
eax
0x080490B8
ebx
0x080490B8
ptr
0x080490B8
we use the 32-bit value in ptr
(the address of msg)
msg: db “THIS IS A STRING”, 10
len: equ $-msg
ptr: dd 0x00
UNIX> ./a.out
THIS IS A STRING
UNIX>
mov eax, msg
mov dword [ptr], msg
mov ebx, [ptr]
mov eax,4
mov ebx,1
mov ecx, [ptr]
mov edx, len
13 int 0x80
address stored in ptr works!
Outline
•
•
•
•
•
•
14
Strings in x86
esi, edi, ecx, eax
stosb, stosw, stosd
cld, std
rep
loop
esi, edi, ecx, eax
• CPU makes assumption: registers esi, edi, ecx, and eax used in
string-specific instructions…
•
•
•
•
15
esi - source
edi – destination
ecx – string length
eax – buffer between source / destination
buf: resb 1000
; declare a 1000 byte string (in .bss)
; Fill buf string with ‘#’ (in .text)
mov edi, buf
; store buf’s address in edi
mov al, ‘#’
; put character in eax register
mov ecx, 1000
; put string length in ecx
_loop:
mov byte [edi], al
; put ‘#’ in memory pointed to by
edi
inc edi
; edi + 1 points to next byte in buf
dec ecx
; decrement loop counter
jnz _loop
; if loop counter > 0, loop
;syscall to print buf
;syscall to exit
buf: resb 1000
; in .bss
; Fill buf string with ‘#’ (in .text)
mov edi, buf
mov al, ‘#’
mov ecx, 1000
_loop:
mov byte [edi], al
inc edi
dec ecx
jnz _loop
;syscall to print buf
;syscall to exit
UNIX> ./a.out
#######################
#######################
#######################
#######################
#######################
#######################
#######################
#######################
#######################
#######################
#######################
#######################
#######################
#########...UNIX>
I ain’t no liar none!! (dag nabbit!!)
18
esi, edi, ecx, eax
• Is there a simpler way to do this common string manipulation?
_loop:
mov byte [edi], al
inc edi
dec ecx
jnz _loop
• Yes!! (duh)
19
Outline
•
•
•
•
•
•
20
Strings in x86
esi, edi, ecx, eax
stosb, stosw, stosd
cld, std
rep
loop
stosb
• stosb mnemonic: “Store String by Byte”
• stosb does the following:
1. copies byte al to memory at edi
2. increments edi
• stosb instruction takes no operands
• edi , al are implicit
21
stosb
_loop:
_loop:
mov byte [edi], al
inc edi
dec ecx
jnz _loop
stosb
dec ecx
jnz _loop
equivalent
22
stosw, stosd
• stosw – similar to stosb, but works with word strings
• Uses ax instead of al
• stosd – similar to stosb, but works with dword strings
• Uses eax instead of al
• Note: ecx remains unchanged
• ecx is still number of items in string (not number of bytes)
• E.g., 1000 bytes, 1000 words, 1000 dwords
23
buf: resd 1000
; declare a 1000 dword string (in .bss)
; Fill buf string with ‘#’ (in .text)
mov edi, buf
; store buf’s address in edi
mov ecx, 1000
; put string length in ecx
mov eax,0xACEBEEF
; store some identifiable string
_loop:
stosd
; store eax in [edi] (buf)
dec ecx
; decrement loop counter
jnz _loop
; if loop counter > 0, loop
mov ebx,[buffer + 500*4]
;syscall to exit
24
; examine 500th item
buf: resd 1000
mov edi, buf
mov ecx, 1000
mov eax,0xACEBEEF
_loop:
stosd
dec ecx
jnz _loop
mov ebx,[buf + 500*4]
buf[0]
buf[1]
buf[…]
buf[500]
buf[…]
buf[999]
eax
ebx
ecx
25
…
edi
…
buf: resd 1000
mov edi, buf
mov ecx, 1000
mov eax,0xACEBEEF
_loop:
stosd
dec ecx
jnz _loop
mov ebx,[buf + 500*4]
buf[0]
buf[1]
buf[…]
…
buf[500]
buf[…]
…
buf[999]
eax
ebx
ecx
26
edi
buf
buf: resd 1000
mov edi, buf
mov ecx, 1000
mov eax,0xACEBEEF
_loop:
stosd
dec ecx
jnz _loop
mov ebx,[buf + 500*4]
27
buf[0]
buf[1]
buf[…]
…
buf[500]
buf[…]
…
buf[999]
eax
ebx
ecx
1000
edi
buf
buf: resd 1000
mov edi, buf
mov ecx, 1000
mov eax,0xACEBEEF
_loop:
stosd
dec ecx
jnz _loop
mov ebx,[buf + 500*4]
28
buf[0]
buf[1]
buf[…]
…
buf[500]
buf[…]
…
buf[999]
eax
0xACEBEEF
ebx
ecx
1000
edi
buf
buf: resd 1000
mov edi, buf
mov ecx, 1000
mov eax,0xACEBEEF
_loop:
stosd
dec ecx
jnz _loop
mov ebx,[buf + 500*4]
29
buf[0]
0xACEBEEF
buf[1]
buf[…]
…
buf[500]
buf[…]
…
buf[999]
eax
0xACEBEEF
ebx
ecx
1000
edi
buf+4
buf: resd 1000
mov edi, buf
mov ecx, 1000
mov eax,0xACEBEEF
_loop:
stosd
dec ecx
jnz _loop
mov ebx,[buf + 500*4]
30
buf[0]
0xACEBEEF
buf[1]
buf[…]
…
buf[500]
buf[…]
…
buf[999]
eax
0xACEBEEF
ebx
ecx
999
edi
buf+4
buf: resd 1000
mov edi, buf
mov ecx, 1000
mov eax,0xACEBEEF
_loop:
stosd
dec ecx
jnz _loop
mov ebx,[buf + 500*4]
31
buf[0]
0xACEBEEF
buf[1]
0xACEBEEF
buf[…]
…
buf[500]
buf[…]
…
buf[999]
eax
0xACEBEEF
ebx
ecx
999
edi
buf+8
buf: resd 1000
mov edi, buf
mov ecx, 1000
mov eax,0xACEBEEF
_loop:
stosd
dec ecx
jnz _loop
mov ebx,[buf + 500*4]
32
buf[0]
0xACEBEEF
buf[1]
0xACEBEEF
buf[…]
…
buf[500]
buf[…]
…
buf[999]
eax
0xACEBEEF
ebx
ecx
998
edi
buf+8
buf: resd 1000
mov edi, buf
mov ecx, 1000
mov eax,0xACEBEEF
_loop:
stosd
dec ecx
jnz _loop
0xACEBEEF
buf[1]
0xACEBEEF
buf[…]
…
buf[500]
buf[…]
loop continues
998 more times
(1000 total)
mov ebx,[buf + 500*4]
33
buf[0]
0xACEBEEF
…
buf[999]
0xACEBEEF
eax
0xACEBEEF
ebx
ecx
0
edi
buf+4000
buf: resd 1000
mov edi, buf
mov ecx, 1000
mov eax,0xACEBEEF
_loop:
stosd
dec ecx
jnz _loop
mov ebx,[buf + 500*4]
34
don’t forget how to access
dwords in memory…
buf[0]
0xACEBEEF
buf[1]
0xACEBEEF
buf[…]
…
buf[500]
buf[…]
0xACEBEEF
…
buf[999]
0xACEBEEF
eax
0xACEBEEF
ebx
0xACEBEEF
ecx
0
edi
buf+4000
Outline
•
•
•
•
•
•
35
Strings in x86
esi, edi, ecx, eax
stosb, stosw, stosd
cld, std
rep
loop
Directional Flag
• DF determines direction of stosb command
• DF clear: fill string “uphill”, low to high memory (default)
• edi gets incremented
• DF set: fill string “downhill”, high to low memory
• edi gets decremented
36
DF Commands
cld
->
clear DF
(cld takes no arguments)
std
->
set DF
(std takes no arguments)
37
buf: resb 10
; declare string buffer of 10 bytes (in .bss)
mov edi, buf
mov ecx, 10
mov al,’0’
cld
_loop:
stosb
inc al
dec ecx
jnz _loop
; point edi to string buf (in .text)
; set loop counter to 10
; put character 0 in al
; clear DF to go “uphill” in memory
; store al in [edi] (then edi++)
; change ‘0’ to ‘1’…
; decrement the loop counter
; close the loop
;sys calls to write buf, write newline, and exit cleanly
buf: resb 10;
mov edi, buf;
mov ecx, 10;
mov al,’0’;
cld;
_loop:
stosb;
inc al;
dec ecx;
jnz _loop
;sys calls
39
UNIX> ./a.out
0123456789
UNIX>
start at BEGINNING of string
DF clear: go “uphill” from
low to high memory
buf: resb 10
; declare string buffer of 10 bytes (in .bss)
mov edi, buf+10
mov ecx, 10
mov al,’0’
std
_loop:
stosb
inc al
dec ecx
jnz _loop
; point edi to end of string buf (in .text)
; set loop counter to 10
; put character 0 in al
; set DF to go “downhill” in memory
; store al in [edi] (then edi--)
; change ‘0’ to ‘1’…
; decrement the loop counter
; close the loop
;sys calls to write buf, write newline, and exit cleanly
buf: resb 10;
mov edi, buf+10;
mov ecx, 10;
mov al,’0’;
std
_loop:
stosb;
inc al;
dec ecx;
jnz _loop
;sys calls
41
UNIX> ./a.out
987654321
UNIX>
start at END of string
DF set: go “downhill” from
high to low memory
Outline
•
•
•
•
•
•
42
Strings in x86
esi, edi, ecx, eax
stosb, stosw, stosd
cld, std
rep
loop
rep
• Is there an even more succinct way to do this?
_loop:
rep stosb
stosb
dec ecx
jnz _loop
equivalent
43
rep
• rep stosb
1.
2.
3.
4.
copy byte al to memory at address edi
increment (or decrement) edi
decrement ecx
if ecx > 0, jump back to copy instruction
rep => repeat
44
buf: resb 1000
; declare a 1000 byte string (in .bss)
; Fill buf string with ‘$’ (in .text)
mov edi, buf
; store buf’s address in edi
mov al, ‘$’
; put character in eax register
mov ecx, 1000
; put string length in ecx
rep stosb
;syscall to print buf
;syscall to exit
; single command to:
; copy al to [edi]
; increment edi
; decrement ecx
; compare ecx to 0 and jump
buf: resb 1000
mov edi, buf
mov al, ‘$’;
mov ecx, 1000;
rep stosb
;syscall to print buf
;syscall to exit
UNIX> ./a.out
$$$$$$$$$$$$$$$$$$$$$$$
$$$$$$$$$$$$$$$$$$$$$$$
$$$$$$$$$$$$$$$$$$$$$$$
$$$$$$$$$$$$$$$$$$$$$$$
$$$$$$$$$$$$$$$$$$$$$$$
$$$$$$$$$$$$$$$$$$$$$$$
$$$$$$$$$$$$$$$$$$$$$$$
$$$$$$$$$$$$$$$$$$$$$$$
$$$$$$$$$$$$$$$$$$$$$$$
$$$$$$$$$$$$$$$$$$$$$$$
$$$$$$$$$$$$$$$$$$$$$$$
$$$$$$$$$$$$$$$$$$$$$$$
$$$$$$$$$$$$$$$$$$$$$$$
$$$$$$$$$$$$$$$$$$$$$$$
$$$$$$$$$$$$$$$$$$$$$$$
$$$$$$$$$$$$$$$$$$$$$$..
Outline
•
•
•
•
•
•
47
Strings in x86
esi, edi, ecx, eax
stosb, stosw, stosd
cld, std
rep
loop
loop Instruction
• Usage: loop <label>;
• loop does the following:
1) dec ecx
2) jnz label
anyone know what this is?
it’s used in geology…
48
loop Instruction
_myLoop:
inc al
dec ecx
jnz _myLoop
_myLoop:
inc al
loop _myLoop
equivalent
49