从5个字符串中插入字符在nasm汇编

huangapple go评论60阅读模式
英文:

Intercalate characters from 5 strings in assembly nasm

问题

我必须编写一个汇编程序,该程序将来自用户在键盘上输入的五个不同字符串的字符交错排列,例如,如果我有:

  • S1 : "Hello"
  • S2 : "Bye"
  • S3 : "Apple"
  • S4 : "Car"
  • S5 : "Tree"

结果将是:"HBACTeyparleprelloe"

到目前为止,我所做的就是可以交错相同大小的字符串,但我不知道如何使其适用于不同大小的字符串,并且是否有更好的方法。由于这是我编写的最初的汇编程序之一,我会感激您的帮助。

英文:

I have to code an assembly program that intercalates characters from five different strings that the user types on the keyboard, for example, if I had:

  • S1 : "Hello"
  • S2 : "Bye"
  • S3 : "Apple"
  • S4 : "Car"
  • S5 : "Tree"

it would result: "HBACTeyparleprelloe"

This is what I did so far, it can intercalate from stings with the same size, I don't know what to do to make it work for different sized strings and if there's a better way to do it. I would appreciate the help since it is one of my first assembly programs.

segment .data
instruccion db 'Ingrese 5 cadenas de igual longitud no mayores a 20 caracteres:',0x0A
lonI EQU ($-instruccion)

segment .bss
contador resb 1
cad1 resb 20
cad2 resb 20
cad3 resb 20
cad4 resb 20
cad5 resb 20
cad6 resb 101

segment .text
    global _start
_start:
    mov edx,lonI
    mov ecx,instruccion
    call imprimir

    mov edx,20d
    mov ecx,cad1
    call leer

    mov ecx,cad2
    call leer

    mov ecx,cad3
    call leer

    mov ecx,cad4
    call leer

    mov ecx,cad5
    call leer

    mov edi,cad1
    mov ecx,255
    mov eax,0Ah
    repne scasb
    mov eax,255
    inc ecx
    sub eax,ecx

    mov edi,cad6
    mov ecx,eax
    mov ebx,0
    ciclo:
        mov esi,cad1
        cld
        mov edx,ecx
        mov ecx,ebx
        cmp ebx,0
        jne THEN1
        je ELSE1
        THEN1:
            lodsb
            loop THEN1
        ELSE1:
            movsb

        mov esi,cad2
        cld
        mov ecx,ebx
        cmp ebx,0
        jne THEN2
        je ELSE2
        THEN2:
            lodsb
            loop THEN2
        ELSE2:
            movsb

        mov esi,cad3
        cld
        mov ecx,ebx
        cmp ebx,0
        jne THEN3
        je ELSE3
        THEN3:
            lodsb
            loop THEN3
        ELSE3:
            movsb

        mov esi,cad4
        cld
        mov ecx,ebx
        cmp ebx,0
        jne THEN4
        je ELSE4
        THEN4:
            lodsb
            loop THEN4
        ELSE4:
            movsb

        mov esi,cad5
        cld
        mov ecx,ebx
        cmp ebx,0
        jne THEN5
        je ELSE5
        THEN5:
            lodsb
            loop THEN5
        ELSE5:
            movsb

        mov ecx,edx
        inc ebx
    loop ciclo

    mov eax,0Ah
    stosb
    mov edx,101d
    mov ecx,cad6
    call imprimir

    mov	eax,1	        ;system call number (sys_exit)
    int	0x80	        ;call kernel

    leer:
        mov ebx,0
        mov eax,3
        int 0x80
        ret

    imprimir:
        mov ebx,1
        mov eax,4
        int 0x80
        ret

答案1

得分: 1

> 这可能是打字错误,否则这将变成一个非常不愉快的练习!我会假设 "HBACTeyparleprelleoe"。

> 它可以将相同长度的字符串中的内容插入

您现在的代码似乎正确地完成了这一点,但为什么这样复杂呢?
如果当前索引(字符串中的偏移量)为0,您只需执行 movsb。如果当前索引不是0,因此您需要跳过,您可以使用(低效的)looplodsb 指令组合。有时人们会想知道为什么允许 rep lodsb 指令,嗯,在这里它们有一个使用情况。尽管不完全是这样,因为实际的解决方案是完全替换:

> mov esi,cad1
> cld
> mov ecx,ebx
> cmp ebx,0
> jne THEN1
> je ELSE1
> THEN1:
> lodsb
> loop THEN1
> ELSE1:
> movsb

完全替换为:

lea     esi, [cad1 + ebx]
movsb

或者也可以替换为:

movzx   eax, byte [cad1 + ebx]
stosb

> 我不知道怎么做才能使其适用于不同大小的字符串

下面我将介绍3种已经测试过的解决方案。

解决方案1

由于恰好有5个输入字符串,32位x86体系结构恰好有足够的寄存器来保持各自的指针在各自的寄存器中。这种方法可以提供最快的代码,但前提是各个字符串的长度不能相差太多。

S:      db      43 dup 0
S1:     db      "Hello", 10
S2:     db      "Bye", 10
S3:     db      "AppleADayKeepsTheDoctorAway", 10
S4:     db      "Car", 10
S5:     db      "Tree", 10

        ...

Begin:  mov     ebx, S1                 ; 输入字符串的地址
        mov     ecx, S2
        mov     edx, S3
        mov     esi, S4
        mov     edi, S5
        mov     ebp, S                  ; 输出字符串的地址
.a:     push    ebp                     ; (1)

        movzx   eax, byte [ebx]         ; 从此字符串读取一个字符
        cmp     al, 10                  ; 如果此字符串已用尽,则
        je      .b                      ;  不再添加到输出字符串中
        inc     ebx                     ; 转到该字符串中的下一个字符
        mov     [ebp], al               ; 将字符添加到输出字符串中
        inc     ebp

.b:     movzx   eax, byte [ecx]         ; 从此字符串读取一个字符
        cmp     al, 10                  ; 如果此字符串已用尽,则
        je      .c                      ;  不再添加到输出字符串中
        inc     ecx                     ; 转到该字符串中的下一个字符
        mov     [ebp], al               ; 将字符添加到输出字符串中
        inc     ebp

.c:     movzx   eax, byte [edx]         ; 从此字符串读取一个字符
        cmp     al, 10                  ; 如果此字符串已用尽,则
        je      .d                      ;  不再添加到输出字符串中
        inc     edx                     ; 转到该字符串中的下一个字符
        mov     [ebp], al               ; 将字符添加到输出字符串中
        inc     ebp

.d:     movzx   eax, byte [esi]         ; 从此字符串读取一个字符
        cmp     al, 10                  ; 如果此字符串已用尽,则
        je      .e                      ;  不再添加到输出字符串中
        inc     esi                     ; 转到该字符串中的下一个字符
        mov     [ebp], al               ; 将字符添加到输出字符串中
        inc     ebp

.e:     movzx   eax, byte [edi]         ; 从此字符串读取一个字符
        cmp     al, 10                  ; 如果此字符串已用尽,则
        je      .f                      ;  不再添加到输出字符串中
        inc     edi                     ; 转到该字符串中的下一个字符
        mov     [ebp], al               ; 将字符添加到输出字符串中
        inc     ebp

.f:     pop     eax                     ; (1)
        cmp     eax, ebp                ; 有任何东西添加到输出字符串中吗?
        jne     .a                      ; 是的,然后重复

解决方案2

进行一些微小的编辑即可处理任意数量的输入字符串。这种方法比之前慢,但是它需要对字符串进行填充,使它们具有相同的长度(就像您在问题中所做的那样)。

S:      db      43 dup 0
S1:     db      "Hello", 22 dup 10, 10
S2:     db      "Bye", 24 dup 10, 10
S3:     db      "AppleADayKeepsTheDoctorAway", 10
S4:     db      "Car", 24 dup 10, 10
S5:     db      "Tree", 23 dup 10, 10

        ...

Begin:  xor     ebx, ebx                ; 每个字符串中的当前偏移量
        mov     ebp, S                  ; 输出字符串的地址
.a:     push    ebp                     ; (1)

        movzx   eax, byte [S1 + ebx]    ; 从此字符串读取一个字符
        cmp     al, 10                  ; 如果此字符串已用尽,则
        je      .b                      ;  不再添加到输出字符串中
        mov     [ebp], al               ; 将字符添加到输出字符串中
        inc     ebp

.b:     movzx

<details>
<summary>英文:</summary>

&gt; it would result: &quot;HBACTeyparlepre**ll**oe&quot;

I sure hope this was a typo because otherwise this would become a very nasty exercise indeed! I will be assuming &quot;HBACTeyparlepre**lle**oe&quot;.

&gt; it can intercalate from stings **with the same size**

Your present code seems to do *that* correctly, but why is it so convoluted?  
If the current index (offset in the string) is 0, you just do `movsb`. And if the current index isn&#39;t 0, so you need to skip ahead, you do so with a (wasteful) `loop` of `lodsb` instructions. [Sometimes people wonder why `rep lodsb` is allowed](https://stackoverflow.com/questions/44486459/why-rep-lods-al-instruction-exists?), well here they have a bit of a use case. Although not really, since the practical solution would be to replace:

&gt;     mov esi,cad1
&gt;     cld
&gt;     mov ecx,ebx
&gt;     cmp ebx,0
&gt;     jne THEN1
&gt;     je ELSE1
&gt;     THEN1:
&gt;         lodsb
&gt;         loop THEN1
&gt;     ELSE1:
&gt;         movsb

entirely by:

    lea     esi, [cad1 + ebx]
    movsb

or alternatively by:

    movzx   eax, byte [cad1 + ebx]
    stosb


&gt; I don&#39;t know what to do to make it work for **different sized strings**

Below I will present 3 solutions, all tested.


Solution 1
-

Because there are 5 input strings precisely, the 32-bit x86 architecture has just the right number of registers to keep individual pointers in their own register. This approach gives the fastest code but only if the lengths of the individual strings don&#39;t differ by too much.


S: db 43 dup 0
S1: db "Hello", 10
S2: db "Bye", 10
S3: db "AppleADayKeepsTheDoctorAway", 10
S4: db "Car", 10
S5: db "Tree", 10

    ...

Begin: mov ebx, S1 ; Addresses of the input strings
mov ecx, S2
mov edx, S3
mov esi, S4
mov edi, S5
mov ebp, S ; Address of the output string
.a: push ebp ; (1)

    movzx   eax, byte [ebx]         ; Read a character from this string
    cmp     al, 10                  ; If this string is exhausted, then
    je      .b                      ;  no longer add to the output string
    inc     ebx                     ; Go to the next character in this string
    mov     [ebp], al               ; Add character to the output string
    inc     ebp

.b: movzx eax, byte [ecx] ; Read a character from this string
cmp al, 10 ; If this string is exhausted, then
je .c ; no longer add to the output string
inc ecx ; Go to the next character in this string
mov [ebp], al ; Add character to the output string
inc ebp

.c: movzx eax, byte [edx] ; Read a character from this string
cmp al, 10 ; If this string is exhausted, then
je .d ; no longer add to the output string
inc edx ; Go to the next character in this string
mov [ebp], al ; Add character to the output string
inc ebp

.d: movzx eax, byte [esi] ; Read a character from this string
cmp al, 10 ; If this string is exhausted, then
je .e ; no longer add to the output string
inc esi ; Go to the next character in this string
mov [ebp], al ; Add character to the output string
inc ebp

.e: movzx eax, byte [edi] ; Read a character from this string
cmp al, 10 ; If this string is exhausted, then
je .f ; no longer add to the output string
inc edi ; Go to the next character in this string
mov [ebp], al ; Add character to the output string
inc ebp

.f: pop eax ; (1)
cmp eax, ebp ; Was anything added to the output string ?
jne .a ; Yes, then repeat


Solution 2
-

A minor edit allows us to process *any* number of input strings. This approach is  slower than before, but it suffers from the necessity to pad the strings so they have the same lengths (like you had it in your question).


S: db 43 dup 0
S1: db "Hello", 22 dup 10, 10
S2: db "Bye", 24 dup 10, 10
S3: db "AppleADayKeepsTheDoctorAway", 10
S4: db "Car", 24 dup 10, 10
S5: db "Tree", 23 dup 10, 10

    ...

Begin: xor ebx, ebx ; Current offset in every string
mov ebp, S ; Address of the output string
.a: push ebp ; (1)

    movzx   eax, byte [S1 + ebx]    ; Read a character from this string
    cmp     al, 10                  ; If this string is exhausted, then
    je      .b                      ;  no longer add to the output string
    mov     [ebp], al               ; Add character to the output string
    inc     ebp

.b: movzx eax, byte [S2 + ebx] ; Read a character from this string
cmp al, 10 ; If this string is exhausted, then
je .c ; no longer add to the output string
mov [ebp], al ; Add character to the output string
inc ebp

.c: movzx eax, byte [S3 + ebx] ; Read a character from this string
cmp al, 10 ; If this string is exhausted, then
je .d ; no longer add to the output string
mov [ebp], al ; Add character to the output string
inc ebp

.d: movzx eax, byte [S4 + ebx] ; Read a character from this string
cmp al, 10 ; If this string is exhausted, then
je .e ; no longer add to the output string
mov [ebp], al ; Add character to the output string
inc ebp

.e: movzx eax, byte [S5 + ebx] ; Read a character from this string
cmp al, 10 ; If this string is exhausted, then
je .f ; no longer add to the output string
mov [ebp], al ; Add character to the output string
inc ebp

.f: inc ebx ; Go to next character in every string
pop eax ; (1)
cmp eax, ebp ; Was anything added to the output string ?
jne .a ; Yes, then repeat


Solution 3
-

This time we create an array with pointers to the individual strings. These pointers get used in succession to retrieve a character from the associated string, and when we encounter the end-of-string marker (10), we simply remove the concerned pointer from the array. The other solutions kept dealing with an exhausted string, but here an exhausted string vanishes from the loop.  
Because this method has more housekeeping to do, it will run slower on your very regular test data. However once you feed it a more realistic data set, one with short and long strings, it will shine... There&#39;s also no limit on the number of input strings, padding is not required and neither is using same-size stringbuffers (like in your program).


P: dd S1, S2, S3, S4, S5, 0
S: db 43 dup 0
S1: db "Hello", 10
S2: db "Bye", 10
S3: db "AppleADayKeepsTheDoctorAway", 10
S4: db "Car", 10
S5: db "Tree", 10

    ...

Begin: mov ebp, S ; Address of the output string
jmp .e

.a: mov edi, ebx
.b: mov eax, [edi+4] ; Move all the stringpointers that follow
mov [edi], eax ; one position down in the array
add edi, 4
test eax, eax ; Until the zero-terminator got moved down
jnz .b
jmp .d ; Continue with the next stringpointer

.c: movzx eax, byte [esi] ; Read a character from the current string
cmp al, 10 ; If this string is exhausted, then
je .a ; go remove its pointer from the array
inc esi ; Go to the next character in the current string
mov [ebx], esi ; Update the current stringpointer
add ebx, 4 ; Go to the next stringpointer
mov [ebp], al ; Add character to the output string
inc ebp
.d: mov esi, [ebx] ; Get current stringpointer
test esi, esi ; Arrived at the end of the array if ESI is zero
jnz .c
.e: mov ebx, P ; Address of the array with stringpointers
mov esi, [ebx] ; Get current stringpointer
test esi, esi ; The array is empty if the 1st dword is zero
jnz .c


| method 1 | method 2 | method 3 | comment            |
|:--------:|:--------:|:--------:|:-------------------|
| 0.4 &#181;sec | 0.5 &#181;sec | 1.1 &#181;sec | 5 short strings    |
| 2.0 &#181;sec | 2.1 &#181;sec | 1.5 &#181;sec | with 1 long string |

Expected output:

&gt;     HBACTeyparleprelleoeADayKeepsTheDoctorAway

-----
-----

**[EDIT]**

Building upon the many ideas kindly provided by @PeterCordes through [comments](https://stackoverflow.com/questions/75981517/intercalate-characters-from-5-strings-in-assembly-nasm/76058098?noredirect=1#comment134141480_76058098), and throwing in a couple of new ideas of my own, I was able to write the following faster solutions. (I have dismissed the earlier solution 2 for the reason of the excessive padding that it requires.)

Solution 1b
-

*Switching the roles of EBP and EDI* as Peter suggested already improved the code by 25%. And adding instructions to *set the pointer of an exhausted string to zero*, so as to obtain a cheap way to no longer having to process the string, improved the code by another 20%. I did give `stosb` a chance, but abandoned the idea because it made the code run 17% slower.

S: db 43 dup 0
S1: db "Hello", 10
S2: db "Bye", 10
S3: db "AppleADayKeepsTheDoctorAway", 10
S4: db "Car", 10
S5: db "Tree", 10

    ...

    mov     ebx, S1
    mov     ecx, S2
    mov     edx, S3
    mov     esi, S4
    mov     ebp, S5
    mov     edi, S

.a: push edi

    test    ebx, ebx
    jz      .b
    movzx   eax, byte [ebx]
    cmp     al, 10
    je      .clr1
    inc     ebx
    mov     [edi], al
    inc     edi

.b: test ecx, ecx
jz .c
movzx eax, byte [ecx]
cmp al, 10
je .clr2
inc ecx
mov [edi], al
inc edi

.c: test edx, edx
jz .d
movzx eax, byte [edx]
cmp al, 10
je .clr3
inc edx
mov [edi], al
inc edi

.d: test esi, esi
jz .e
movzx eax, byte [esi]
cmp al, 10
je .clr4
inc esi
mov [edi], al
inc edi

.e: test ebp, ebp
jz .f
movzx eax, byte [ebp]
cmp al, 10
je .clr5
inc ebp
mov [edi], al
inc edi

.f: pop eax
cmp eax, edi
jne .a
...

.clr1: xor ebx, ebx
jmp .b
.clr2: xor ecx, ecx
jmp .c
.clr3: xor edx, edx
jmp .d
.clr4: xor esi, esi
jmp .e
.clr5: xor ebp, ebp
jmp .f


Solution 3b
-

The key improvements are:

- having the top of the inner loop (`.c`) *16-byte-aligned*
- maintaining *a count of pointers* instead of zero-terminating the array
- early-exiting so the remainder of the last-remaining string can *get copied verbatim*

The use of `stosb` didn&#39;t harm the execution time (only gain is codesize) and so I kept it this time.

P: dd S1, S2, S3, S4, S5
S: db 43 dup 0
S1: db "Hello", 10
S2: db "Bye", 10
S3: db "AppleADayKeepsTheDoctorAway", 10
S4: db "Car", 10
S5: db "Tree", 10

    ...

    mov     ebx, P          ; Address of the pointers array
    mov     esi, [ebx]
    mov     edi, S          ; Address of the destination string
    mov     ebp, 5          ; Number of remaining pointers
    mov     edx, ebp        ; Inner loop counter
    jmp     .c

    db      (16-($+21) and 15) dup 0 ; 16-byte aligning `.c`

.a: dec ebp
dec edx
jz .d ; Nothing to copy (is last pointer)
mov esi, ebx
mov ecx, edx
.b: mov eax, [esi+4]
mov [esi], eax
add esi, 4
dec ecx
jnz .b
mov esi, [ebx]

.c: movzx eax, byte [esi]
cmp al, 10
je .a
inc esi
mov [ebx], esi
add ebx, 4
stosb
mov esi, [ebx]
dec edx
jnz .c

.d: mov ebx, P
mov esi, [ebx]
mov edx, ebp
cmp ebp, 1
ja .c ; Continue while at least 2 strings remain
jb .f ; Done if none remains

    movzx   eax, byte [esi] ; Copy remainder of last-remaining string quickly
    cmp     al, 10
    je      .f

.e: inc esi
stosb
movzx eax, byte [esi]
cmp al, 10
jne .e
.f: ...


| Solution 1b       | Solution 3b       | Comment            |
|:-----------------:|:-----------------:|:-------------------|
| 0.3875 &#181;sec (0.4) | 0.6681 &#181;sec (1.1) | 5 short strings    |
| 1.1866 &#181;sec (2.0) | 0.9033 &#181;sec (1.5) | with 1 long string |

Solution 1 can deal with *at most 5* strings.  
Solution 3 can deal with *any number of* strings.


</details>



huangapple
  • 本文由 发表于 2023年4月11日 07:48:32
  • 转载请务必保留本文链接:https://go.coder-hub.com/75981517.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定