From: Mika Pekka Liljeberg (liljeber@kruuna.Helsinki.FI)
Date: 06/07/92


From: liljeber@kruuna.Helsinki.FI (Mika Pekka Liljeberg)
Subject: Re: gcc2 woes
Date: 7 Jun 1992 09:51:48 GMT

In article <60591@hydra.gatech.EDU> gt7080a@prism.gatech.EDU (Nathan Laredo) wrote:
> Now that was short, sweet and to the point... almost as good as I'd
> write it in assembler. Now look at this:
>
> My 486 under Linux with gcc -O6 -S test.c:
>
> .file "test.c"
> gcc2_compiled.:
> .text
> LC0:
> .ascii "start\0"
> LC1:
> .ascii "done\0"
> .align 2
> .globl _main
> _main:
> pushl %ebp
> movl %esp,%ebp
> call ___main
> pushl $__cout_sbuf
> pushl $LC0
> call _fputs
> addl $8,%esp
> cmpl $-1,%eax
> je L3
> movl __cout_sbuf+24,%edx
> cmpl %edx,__cout_sbuf+20
> jb L5
> pushl $10
> pushl $__cout_sbuf
> call ___overflow
> addl $8,%esp
> jmp L3
> L5:
> movl __cout_sbuf+20,%eax
> movb $10,(%eax)
> incl __cout_sbuf+20
> .align 2,0x90
> L3:
> movl $9999999,%eax
> L9:
> decl %eax
> jns L9
> pushl $__cout_sbuf
> pushl $LC1
> call _fputs
> addl $8,%esp
> cmpl $-1,%eax
> je L12
> movl __cout_sbuf+24,%edx
> cmpl %edx,__cout_sbuf+20
> jb L14
> pushl $10
> pushl $__cout_sbuf
> call ___overflow
> leave
> ret
> .align 2,0x90
> L14:
> movl __cout_sbuf+20,%eax
> movb $10,(%eax)
> incl __cout_sbuf+20
> .align 2,0x90
> L12:
> leave
> ret
>
> It's trash! Look at how wasteful it is... In fact, I

What you're looking at is speed optimization, not size optimization.
These two goals are usually in conflict and a reasonable compromise
is hard to find.

Here's the culprit in this particular case (excerpt from stdio.h):
=======================================================================
#ifdef __OPTIMIZE__

#define getc(fp) ((fp)->_gptr >= (fp)->_egptr && \
                                __underflow((struct streambuf*)(fp)) \
                                == EOF ? EOF \
                                : *(unsigned char*)(fp)->_gptr++)
#define getchar() getc(stdin)
#define putc(c,fp) (((fp)->_pptr >= (fp)->_epptr) ? \
                                __overflow((struct streambuf*)(fp), \
                                (unsigned char) (c)) \
                                : (unsigned char)(*(fp)->_pptr++ = (c)))
#define putchar(c) putc(c, stdout)
#define clearerr(stream) ((stream)->_flags &= \
                                ~(STDIO_S_ERR_SEEN | STDIO_S_EOF_SEEN))
#define feof(stream) (((stream)->_flags & \
                                STDIO_S_EOF_SEEN) ? EOF : 0)
#define ferror(stream) (((stream)->_flags & \
                                STDIO_S_ERR_SEEN) != 0)
#define fgetc(stream) getc((stream))
#define fputc(c,stream) putc((c),(stream))
#define puts(s) ((fputs((s), stdout) == EOF || \
                                putc('\n', stdout) == EOF) ? EOF : 0)

#endif /* not Optimizing */
============================================================================
Note the definition for puts().

Now, try compiling the following:

/* This is a test */

#undef __OPTIMIZE__
#include <stdio.h>

main()
{
        int i;

        puts("start");
        for (i=0;i<10000000;i++);
        puts("done");
}

You'll get the same output as from Sequent, almost word for word.
Whether expanding stdio functions inline is worth it, I can't say.
Depends on the application, I guess.

I hope this clears things up.

        Mika