
# Test command:
  vlc -I dummy -V dummy --noa Dolby_Broadway.vob vlc:quit

# Modules really used:
  dummy ps yuvmmx idctmmxext motionmmxext

# Script:
  for i in `seq 1 10` ; do (time ./vlc -I dummy -V dummy --noa \
    ../streams/trailers/Dolby_Broadway.vob vlc:quit) 2>&1 \
    | grep total | sed 's/.*:quit//' ; done

# OK, we try vlc with everything compiled as plugins. Supposedly the slowest,
# since we have the dlopen overhead, and the -fPIC overhead. Results are
# surprising. It's damn fast.
# 
# PLUGINS := yuv idct idctclassic motion imdct downmix yuvmmx idctmmx motionmmx
#            idctmmxext motionmmxext imdct3dn imdctsse downmix3dn downmixsse dsp
#            sdl gtk x11 xvideo es ps ts dvd dummy null rc
# BUILTINS := 

  12.18s user 0.05s system 35% cpu 34.419 total
  12.00s user 0.03s system 34% cpu 34.437 total
  12.06s user 0.01s system 35% cpu 34.438 total
  11.93s user 0.04s system 34% cpu 34.438 total
  12.07s user 0.01s system 35% cpu 34.423 total
  12.01s user 0.02s system 34% cpu 34.438 total
  12.01s user 0.02s system 34% cpu 34.438 total
  12.03s user 0.06s system 35% cpu 34.438 total
  12.01s user 0.04s system 35% cpu 34.426 total
  12.01s user 0.02s system 34% cpu 34.438 total

# Now we put CPU-inexpensive modules as builtins. As expected, the speed
# difference can be neglected.
# 
# PLUGINS := yuv idct idctclassic motion imdct downmix yuvmmx idctmmx motionmmx
#            idctmmxext motionmmxext imdct3dn imdctsse downmix3dn downmixsse dsp
#            sdl gtk x11 xvideo
# BUILTINS := es ps ts dvd dummy null rc

  12.06s user 0.08s system 35% cpu 34.430 total
  12.08s user 0.01s system 35% cpu 34.420 total
  11.98s user 0.03s system 34% cpu 34.440 total
  12.01s user 0.05s system 35% cpu 34.437 total
  12.09s user 0.02s system 35% cpu 34.438 total
  12.02s user 0.03s system 34% cpu 34.438 total
  12.06s user 0.03s system 35% cpu 34.419 total
  12.01s user 0.07s system 35% cpu 34.418 total
  12.05s user 0.01s system 35% cpu 34.419 total
  12.06s user 0.03s system 35% cpu 34.419 total

# We now put almost everything as builtins, except output and interface
# modules which aren't called anyway. Big performance hit !!
# 
# PLUGINS := dsp sdl gtk x11 xvideo
# BUILTINS := es ps ts dvd dummy null rc yuv idct idctclassic motion imdct
#             downmix yuvmmx idctmmx motionmmx idctmmxext motionmmxext imdct3dn
#             imdctsse downmix3dn downmixsse

  14.29s user 0.02s system 41% cpu 34.437 total
  14.28s user 0.05s system 41% cpu 34.427 total
  14.49s user 0.04s system 42% cpu 34.419 total
  14.25s user 0.03s system 41% cpu 34.436 total
  14.21s user 0.04s system 41% cpu 34.435 total
  14.49s user 0.06s system 42% cpu 34.436 total
  14.89s user 0.05s system 43% cpu 34.421 total
  15.00s user 0.03s system 43% cpu 34.424 total
  14.66s user 0.06s system 42% cpu 34.425 total
  14.50s user 0.06s system 42% cpu 34.424 total

# Same with everything as builtins. As expected, same performance hit.
#
# PLUGINS := 
# BUILTINS := es ps ts dvd dummy null rc yuv idct idctclassic motion imdct
#             downmix yuvmmx idctmmx motionmmx idctmmxext motionmmxext imdct3dn
#             imdctsse downmix3dn downmixsse dsp sdl gtk x11 xvideo

  14.34s user 0.04s system 41% cpu 34.426 total
  14.49s user 0.01s system 42% cpu 34.420 total
  14.32s user 0.04s system 41% cpu 34.421 total
  14.66s user 0.09s system 42% cpu 34.431 total
  14.58s user 0.06s system 42% cpu 34.436 total
  14.56s user 0.02s system 42% cpu 34.420 total
  14.49s user 0.08s system 42% cpu 34.421 total
  14.36s user 0.03s system 41% cpu 34.422 total
  14.63s user 0.08s system 42% cpu 34.420 total
  14.39s user 0.01s system 41% cpu 34.421 total

#
# ( We now remove all modules that aren't needed by our test bench. )
#

# Again, everything as plugins, we get the expected excellent results.
#
# PLUGINS := dummy ps yuvmmx idctmmxext motionmmxext
# BUILTINS := 

  12.12s user 0.00s system 35% cpu 34.423 total
  12.06s user 0.03s system 35% cpu 34.425 total
  12.23s user 0.08s system 35% cpu 34.420 total
  12.27s user 0.03s system 35% cpu 34.440 total
  11.95s user 0.04s system 34% cpu 34.420 total
  12.07s user 0.01s system 35% cpu 34.437 total
  12.12s user 0.08s system 35% cpu 34.418 total
  11.98s user 0.04s system 34% cpu 34.439 total
  12.03s user 0.03s system 35% cpu 34.423 total
  11.97s user 0.04s system 34% cpu 34.423 total

# Same as above, but with -fPIC disabled. First conclusion: there's
# almost no -fPIC overhead.
#
# PLUGINS := dummy ps yuvmmx idctmmxext motionmmxext
# BUILTINS := 

  12.03s user 0.02s system 35% cpu 34.420 total
  12.04s user 0.03s system 35% cpu 34.438 total
  12.01s user 0.00s system 34% cpu 34.418 total
  11.96s user 0.07s system 34% cpu 34.434 total
  11.92s user 0.05s system 34% cpu 34.438 total
  12.00s user 0.01s system 34% cpu 34.438 total
  11.89s user 0.07s system 34% cpu 34.420 total
  12.02s user 0.03s system 35% cpu 34.421 total
  11.87s user 0.04s system 34% cpu 34.437 total
  11.90s user 0.09s system 34% cpu 34.420 total

# We now put all these modules as builtins. Another
# surprise: it's even slower than with everything built in.
# 
# PLUGINS :=
# BUILTINS := dummy ps yuvmmx idctmmxext motionmmxext

  14.94s user 0.02s system 43% cpu 34.424 total
  14.99s user 0.06s system 43% cpu 34.424 total
  15.14s user 0.05s system 44% cpu 34.424 total
  14.97s user 0.04s system 43% cpu 34.423 total
  14.97s user 0.01s system 43% cpu 34.424 total
  15.10s user 0.04s system 43% cpu 34.424 total
  14.93s user 0.05s system 43% cpu 34.424 total
  15.20s user 0.03s system 44% cpu 34.424 total
  14.99s user 0.03s system 43% cpu 34.424 total
  15.12s user 0.03s system 44% cpu 34.424 total

# We now leave only the two CPU-critical functions in vlc. A bit better,
# but still pretty crap.
# 
# PLUGINS := dummy ps yuvmmx
# BUILTINS := idctmmxext motionmmxext

  15.07s user 0.04s system 43% cpu 34.427 total
  14.91s user 0.02s system 43% cpu 34.437 total
  14.73s user 0.03s system 42% cpu 34.420 total
  14.72s user 0.02s system 42% cpu 34.424 total
  14.84s user 0.01s system 43% cpu 34.424 total
  14.74s user 0.09s system 43% cpu 34.420 total
  14.46s user 0.09s system 42% cpu 34.424 total
  14.57s user 0.09s system 42% cpu 34.437 total
  14.66s user 0.02s system 42% cpu 34.425 total
  14.94s user 0.06s system 43% cpu 34.423 total

# We swap the motionmmxext and idctmmxext order. Damn, it looks slower!
# 
# PLUGINS := dummy ps yuvmmx
# BUILTINS := motionmmxext idctmmxext

  15.13s user 0.06s system 44% cpu 34.433 total
  14.82s user 0.06s system 43% cpu 34.437 total
  15.24s user 0.07s system 44% cpu 34.420 total
  15.18s user 0.04s system 44% cpu 34.437 total
  15.09s user 0.03s system 43% cpu 34.420 total
  15.55s user 0.04s system 45% cpu 34.421 total
  14.95s user 0.05s system 43% cpu 34.419 total
  15.37s user 0.09s system 44% cpu 34.419 total
  15.25s user 0.04s system 44% cpu 34.420 total
  15.25s user 0.04s system 44% cpu 34.420 total

# We put motionmmxext in a plugin. And we get our performances back!
# 
# PLUGINS := dummy ps yuvmmx motionmmxext
# BUILTINS := idctmmxext

  12.04s user 0.05s system 35% cpu 34.420 total
  12.20s user 0.02s system 35% cpu 34.436 total
  12.02s user 0.04s system 35% cpu 34.419 total
  12.05s user 0.04s system 35% cpu 34.420 total
  11.98s user 0.03s system 34% cpu 34.428 total
  12.06s user 0.01s system 35% cpu 34.419 total
  11.93s user 0.06s system 34% cpu 34.420 total
  11.91s user 0.05s system 34% cpu 34.420 total
  11.95s user 0.04s system 34% cpu 34.420 total
  11.78s user 0.03s system 34% cpu 34.420 total

# Now we put idctmmxext in a plugin. Again, we get performances back,
# but not as much as with motionmmxext in a plugin.
# 
# PLUGINS := dummy ps yuvmmx idctmmxext
# BUILTINS := motionmmxext

  13.66s user 0.05s system 39% cpu 34.425 total
  13.44s user 0.05s system 39% cpu 34.424 total
  13.39s user 0.02s system 38% cpu 34.423 total
  13.54s user 0.01s system 39% cpu 34.424 total
  13.69s user 0.06s system 39% cpu 34.420 total
  13.48s user 0.06s system 39% cpu 34.421 total
  13.61s user 0.11s system 39% cpu 34.421 total
  13.69s user 0.06s system 39% cpu 34.430 total
  13.75s user 0.05s system 40% cpu 34.419 total
  13.72s user 0.07s system 40% cpu 34.419 total

# We put everything back except motionmmxext. Crap perfs again.
#
# PLUGINS := motionmmxext
# BUILTINS := idctmmxext dummy ps yuvmmx

  13.44s user 0.05s system 38% cpu 35.496 total
  13.56s user 0.01s system 39% cpu 34.425 total
  13.53s user 0.03s system 39% cpu 34.426 total
  13.66s user 0.02s system 39% cpu 34.425 total
  13.45s user 0.05s system 39% cpu 34.425 total
  13.58s user 0.07s system 39% cpu 34.433 total
  13.33s user 0.04s system 38% cpu 34.425 total
  13.68s user 0.06s system 39% cpu 34.425 total
  13.44s user 0.06s system 39% cpu 34.426 total
  13.66s user 0.06s system 39% cpu 34.425 total

# idctmmxext and dummy as builtins. Perf hit again.
# 
# PLUGINS := motionmmxext ps yuvmmx
# BUILTINS := idctmmxext dummy 

  13.49s user 0.04s system 39% cpu 34.424 total
  13.14s user 0.05s system 38% cpu 34.423 total
  13.51s user 0.06s system 39% cpu 34.423 total
  13.78s user 0.04s system 40% cpu 34.425 total
  13.69s user 0.07s system 39% cpu 34.419 total
  13.46s user 0.08s system 39% cpu 34.420 total
  13.65s user 0.03s system 39% cpu 34.423 total
  13.88s user 0.04s system 40% cpu 34.418 total
  13.88s user 0.04s system 40% cpu 34.423 total
  13.61s user 0.03s system 39% cpu 34.431 total

# Now we put everything we need as plugins, and the rest as builtins.
# 
# PLUGINS := motionmmxext ps yuvmmx idctmmxext dummy
# BUILTINS := es ts dvd null rc yuv idct idctclassic motion imdct downmix idctmmx motionmmx imdct3dn imdctsse downmix3dn downmixsse dsp sdl gtk x11 xvideo

  12.34s user 0.02s system 35% cpu 34.438 total
  12.05s user 0.02s system 35% cpu 34.436 total
  12.04s user 0.03s system 35% cpu 34.436 total
  12.47s user 0.04s system 36% cpu 34.425 total
  12.25s user 0.06s system 35% cpu 34.426 total
  12.17s user 0.04s system 35% cpu 34.425 total
  12.06s user 0.03s system 35% cpu 34.437 total
  11.93s user 0.03s system 34% cpu 34.438 total
  12.13s user 0.04s system 35% cpu 34.427 total
  12.06s user 0.05s system 35% cpu 34.425 total


# Ok, now let's try to change the compilation order
# 
# PLUGINS := yuvmmx ps dummy
# BUILTINS := motionmmxext idctmmxext
# gcc  -DSYS_LINUX -D_FILE_OFFSET_BITS=64 -D__USE_UNIX98   -Iinclude -Iextras -I/usr/local/include -Wall -Winline -D_REENTRANT -D_GNU_SOURCE -O3 -ffast-math -funroll-loops -fomit-frame-pointer -march=pentiumpro -mcpu=pentiumpro -o vlc src/misc/modules.o src/interface/main.o src/interface/interface.o src/interface/intf_msg.o src/interface/intf_playlist.o src/interface/intf_channels.o src/input/input.o src/input/input_ext-dec.o src/input/input_ext-intf.o src/input/input_dec.o src/input/input_programs.o src/input/input_netlist.o src/input/input_clock.o src/input/mpeg_system.o src/video_output/video_output.o src/video_output/video_text.o src/video_output/video_spu.o src/video_output/video_yuv.o src/audio_output/audio_output.o src/audio_output/aout_ext-dec.o src/audio_output/aout_u8.o src/audio_output/aout_s8.o src/audio_output/aout_u16.o src/audio_output/aout_s16.o src/audio_output/aout_spdif.o src/ac3_decoder/ac3_decoder_thread.o src/ac3_decoder/ac3_decoder.o src/ac3_decoder/ac3_parse.o src/ac3_decoder/ac3_exponent.o src/ac3_decoder/ac3_bit_allocate.o src/ac3_decoder/ac3_mantissa.o src/ac3_decoder/ac3_rematrix.o src/ac3_decoder/ac3_imdct.o src/ac3_spdif/ac3_spdif.o src/ac3_spdif/ac3_iec958.o src/lpcm_decoder/lpcm_decoder_thread.o src/lpcm_decoder/lpcm_decoder.o src/audio_decoder/audio_decoder.o src/audio_decoder/adec_generic.o src/audio_decoder/adec_layer1.o src/audio_decoder/adec_layer2.o src/audio_decoder/adec_math.o src/spu_decoder/spu_decoder.o lib/motionmmxext.a src/video_parser/video_parser.o src/video_parser/vpar_headers.o src/video_parser/vpar_blocks.o src/video_parser/vpar_synchro.o src/video_parser/video_fifo.o src/video_decoder/video_decoder.o src/misc/mtime.o src/misc/tests.o src/misc/netutils.o lib/idctmmxext.a -ldl -lpthread -L/usr/local/lib $LIB_MOTIONMMXEXT $LIB_IDCTMMXEXT -Wall -ldl -lpthread -L/usr/local/lib $LIB_MOTIONMMXEXT $LIB_IDCTMMXEXT

  11.91s user 0.05s system 34% cpu 34.425 total
  11.95s user 0.03s system 34% cpu 34.424 total
  11.91s user 0.01s system 34% cpu 34.424 total
  11.87s user 0.03s system 34% cpu 34.425 total
  11.95s user 0.04s system 34% cpu 34.438 total
  11.96s user 0.02s system 34% cpu 34.439 total
  11.98s user 0.04s system 34% cpu 34.420 total
  11.97s user 0.02s system 34% cpu 34.420 total
  11.86s user 0.05s system 34% cpu 34.424 total
  11.83s user 0.04s system 34% cpu 34.424 total


