From: Laco Rusnak (laco@Olympia)
Date: 08/05/93


From: laco@Olympia (Laco Rusnak)
Subject: memory & cache test program
Date: Thu, 5 Aug 1993 07:57:26 GMT

Here is a small test program for getting information and performance of
main memory and cache in the PC. Sorry, I haven't got english version,
but I believe that it exist. It is a MS-DOG program for Turbo Pascal,
as you can see.

Bye, Laco.

=======================================================================

Program cachemess; {TP > 5.0, c't 1/93 as }
{$A+,S-,R-,N+} { nicht im virtuellen Modus starten }
const maxblocksize=$2000;
const tickertime=12/14.31818e6; { Zeit fr einen Timertick}
Type proztyp = (unbekannt,i8088,i8086,V20,V30,i80188,i80186,i286,
                  i386DX,i386SX,i386SL,ibm386SLC,ct38600,ct38605,
                  i486DX,i486SX,i487DX,i486DX2,
                cyr486slc,cyr486dlc,cyr486S2,ibm486SLC2,Pentium);

Const prozname:array [0..22] of string[12] =
  ('unbekannt','i8088','i8086','V20','V30','i80188','i80186','i286',
   'i386DX','i386SX','i386SL','ibm386SLC','ct38600','ct38605',
   'i486DX','i486SX','i487DX','i486DX2',
   'cyr486slc','cyr486dlc','cyr486S2','ibm486SLC2','Pentium');

Const aamtab:array[0..22] of byte=
(0,77,77,15,15,19,19,16,17,17,17,17,14,14,15,15,15,15,17,17,17,15,00);
Type coproztyp = (nix,i87,i287,i387,i487); { hier nur die Inteltypen }
Type feld = array[1..maxblocksize div 2 ] of word; { 8-K-Bl”cke}
Type feldptr=^feld;

const block0=0; block1=1; block2=2;
      block8:byte=0; { L2-Cache-Antagonist von 0, (=8 bei 64K Cache }
      block9:byte=0; { L2-Cache_Antagonist von 1, (=9 bei 64K Cache }

var bsize :word;
    x :array[0..65] of feldptr; { maximal 512+16 KByte}
    X0,X1,X2,XP :feldptr;
    t :array[0..50] of real;
    writeback, dirtyok :boolean;
var cachesize :byte;
var messproc :procedure;
var load :array[0..16] of real;
{!} var L1_cachesize,L2_cachesize :longint;

Function SMSW:word;inline ($0F/01/$E0);

Procedure Trash (loop,inloop,delta:word; start:pointer);
begin
  inline
  ($1E/ { PUSH DS }
  $8B/$8E/loop/ { MOV CX,loop }
  $8B/$B6/inloop/ { MOV SI,inloop }
  $C4/$BE/start/ { LES DI,start }
  $8B/$96/delta/ { MOV DX,delta }
  {@outloop:}
  $89/$F3/ { MOV BX,SI }
  $8C/$C0/ { MOV AX,ES }
  {@inloop: ; innere Schleife }
  $8E/$D8/ { MOV DS,AX ; inkrementiert Adresse }
  $8B/$05/ { MOV AX,[DI] ; sooft um delta }
  $8C/$D8/ { MOV AX,DS ; wie asso angibt, }
  $01/$D0/ { ADD AX,DX ; ohne sonstige }
  $4B/ { DEC BX ; Memory-Zugriffe }
  $75/$F5/ { JNZ @inloop }
  $E2/$EF/ { LOOP @outloop }
  $1F) { POP DS }
end;

Procedure starttimer; { Systemtimer starten }
begin
port [$43]:=$34; port [$40]:=0; port [$40]:=0;
end;

Function ztime:real; { Systemtimer stoppen: Wert in s }
var a,b:word;
begin
 port [$43]:=0; a:=port[$40]; b:=port[$40];
 ztime:=($10000- a - (b shl 8))*tickertime;
end;

Function abstop:real; { messproc abstoppen}
begin
 messproc; { einmal zum Cache fllen }
 starttimer;
 messproc;
 abstop:=ztime;
end;

Function codemess(code:word):real; {stoppt zweibytigen Code ab}
var
    i:word;
    korrekt:real;
begin
@messproc:=@x0^;
x0^[1]:=$CBCB;
korrekt:= abstop;
for i:=1 to maxblocksize div 2 do
   x0^[i]:=swap(code);
   x1^[1]:=$CBCB;
codemess:=(abstop-korrekt)/maxblocksize*2;
end;

Function movemess (woher,wohin:byte):real;
var wert:real;
begin
move (X2^,X2^,bsize);
move (X0^,X0^,bsize); { 0 in Cache, 0 hit & dirty }
move (X1^,X1^,bsize); { 1 in Cache, 1 hit & dirty }
move (Xp^,X1^,bsize); { 1 raus, hit & clean }
                       {=> 0 hit & dirty }
                       {=> 1 miss /clean }
                       {=> Cachesize miss /dirty }
starttimer;
 move (x[woher]^,x[wohin]^,bsize);
wert:=ztime;
movemess:=wert;
end;

Procedure Get_cachesizes
   (var L1_cachesize,L2_cachesize:longint;var asso:byte);
var
    startadr:pointer;
    range:word;
    L1_found:byte;
    L2_found:byte;
    inloop:word;
    i:byte;

begin
 startadr:= ptr(cseg and $FFF, $50); { vermeidet Trashing mit Code }
 if (cseg > $2000) or (cseg < $1000) then
   startadr:=(ptr(seg(startadr^)+$1000,ofs(startadr^)));

L1_found:=0;
L2_found:=0;
asso:=1;
repeat
 bsize:=1;
 i:=0;
 repeat
 inloop:=asso shl 1;
 range :=bsize*inloop;
 starttimer;
 trash ($400,inloop,bsize,startadr);
 t[i]:=ztime;
 if (L1_found=0) then
   begin
   if (t[i]/t[0] > 1.1) then L1_found:=i;
   end
 else if (L2_found=0) and (t[i]/t[L1_found] > 1.4) then L2_found:=i;
 bsize := bsize shl 1;
 inc (i);
 until range >= $8000; { bis 512 KB }
 asso := asso shl 1;
until (asso > 8) or (L1_found > 0);
asso:=asso div 2;

if L1_found > 0 then L1_cachesize:=(longint($10) shl L1_found*asso)
                else L1_cachesize:=0;
if L2_found > 0 then L2_cachesize:=(longint($10) shl L2_found*asso)
                else
                begin
{!!!
 L1, L2-Zuordnung zu Prim„r/sekund„r-Cache bei h”chstens einem
 Cache etwas ge„ndert, um auch mit abgeschalteten Caches
 besser arbeiten zu k”nnen:
 L1- Cache > 16 KB wird dann immer als Sekund„r-Cache betrachet
 L2- Cache <=16 KB als Prim„r-Cache }

                if L1_cachesize > $4000 then { dann ist L2 der L1 }
                  begin
                  L2_Cachesize:=L1_cachesize;
                  L1_Cachesize:=0;
                  end
                  else L2_cachesize:=0;
 end;
end;

Procedure get_bussize(var bussize:byte; dif:word); { Bussize }
var i:integer;
    startadr:pointer;
const flen=$200;
begin
for i:=0 to 16 do
  begin
  startadr:=ptr($1000,cseg shl 4 +$50+i);
  trash(1,flen,dif,startadr); { L„uft im L1-Hit-Bereich }
  starttimer; { bestimmt daher die Busbreite}
  trash(10,flen,dif,startadr); { zwischen Prozessorkern und }
  load[i]:=ztime; { L1-Cache, also nicht unbedingt }
  bussize:=((i mod 16)+1) *8; { die Memory-Busbreite }
  if i > 0 then if load[i]/load[i-1] > 1.1 then exit;
  end;
end;

Procedure error(msg:string;errnr:word);
begin
writeln (chr(7),msg);
halt (errnr);
end;

const h=0; {hit}
      m=1; {miss}
      c=0; {clean}
      d=1; {dirty}

type zugriffsfeld=array[h..m,h..m,c..d,h..m,h..m,c..d] of real;
var z:zugriffsfeld;
    zlin:array[0..63] of real absolute z;

var i,j,k :word;
    rel, AAMtime, AADtime, Takt :real;
    prozessor :proztyp;
    coproz :coproztyp;
    asso,bussize,intbussize :byte;
    cacheanzahl :byte;

Procedure Showtime (xt:real);
begin
 Writeln (xt*1e6:6:0,' æs =>', bsize/xt/1e6:6:1, ' MByte/s');
end;

type messrec= record
     name:string[7];
     r,w:char;
     g:array[0..3] of real;
     end;

const messarray:array[0..63] of messrec =
(
(name:'hhc_hhc';r:'9';w:'9';g:( 0.4, 0.1, 0.2, 0.1)),
(name:'hhc_hhd';r:'9';w:'9';g:( 0.1, 0.0, 0.0, 0.0)),
(name:'hhc_hmc';r:'9';w:'x';g:( 0.0, 0.0, 0.0, 0.0)),
(name:'hhc_hmd';r:'9';w:'x';g:( 0.0, 0.0, 0.0, 0.0)),
(name:'hhc_mhc';r:'9';w:'9';g:( 0.4, 0.1, 0.2, 0.1)),
(name:'hhc_mhd';r:'9';w:'2';g:( 0.6, 0.4, 0.2, 0.1)),
(name:'hhc_mmc';r:'9';w:'8';g:( 1.7, 0.2, 1.9, 0.5)),
(name:'hhc_mmd';r:'9';w:'8';g:( 0.0, 0.0, 0.0, 0.0)),
(name:'hhd_hhc';r:'9';w:'9';g:( 0.2, 0.0, 0.1, 0.0)),
(name:'hhd_hhd';r:'9';w:'9';g:( 1.6, 2.4, 0.3, 0.5)),
(name:'hhd_hmc';r:'9';w:'x';g:( 0.0, 0.0, 0.0, 0.0)),
(name:'hhd_hmd';r:'9';w:'x';g:( 0.0, 0.0, 0.0, 0.0)),
(name:'hhd_mhc';r:'9';w:'9';g:( 0.7, 0.9, 0.2, 0.3)),
(name:'hhd_mhd';r:'9';w:'2';g:( 1.3, 4.4, 0.1, 0.7)),
(name:'hhd_mmc';r:'9';w:'8';g:( 3.6, 2.0, 1.7, 2.3)),
(name:'hhd_mmd';r:'9';w:'8';g:( 0.0, 0.0, 0.0, 0.0)),
(name:'hmc_hhc';r:'x';w:'x';g:( 0.0, 0.0, 0.0, 0.0)),
(name:'hmc_hhd';r:'x';w:'x';g:( 0.0, 0.0, 0.0, 0.0)),
(name:'hmc_hmc';r:'x';w:'x';g:( 0.0, 0.0, 0.0, 0.0)),
(name:'hmc_hmd';r:'x';w:'x';g:( 0.0, 0.0, 0.0, 0.0)),
(name:'hmc_mhc';r:'x';w:'x';g:( 0.0, 0.0, 0.0, 0.0)),
(name:'hmc_mhd';r:'x';w:'x';g:( 0.0, 0.0, 0.0, 0.0)),
(name:'hmc_mmc';r:'x';w:'x';g:( 0.0, 0.0, 0.0, 0.0)),
(name:'hmc_mmd';r:'x';w:'x';g:( 0.0, 0.0, 0.0, 0.0)),
(name:'hmd_hhc';r:'x';w:'x';g:( 0.0, 0.0, 0.0, 0.0)),
(name:'hmd_hhd';r:'x';w:'x';g:( 0.0, 0.0, 0.0, 0.0)),
(name:'hmd_hmc';r:'x';w:'x';g:( 0.0, 0.0, 0.0, 0.0)),
(name:'hmd_hmd';r:'x';w:'x';g:( 0.0, 0.0, 0.0, 0.0)),
(name:'hmd_mhc';r:'x';w:'x';g:( 0.0, 0.0, 0.0, 0.0)),
(name:'hmd_mhd';r:'x';w:'x';g:( 0.0, 0.0, 0.0, 0.0)),
(name:'hmd_mmc';r:'x';w:'x';g:( 0.0, 0.0, 0.0, 0.0)),
(name:'hmd_mmd';r:'x';w:'x';g:( 0.0, 0.0, 0.0, 0.0)),
(name:'mhc_hhc';r:'0';w:'0';g:( 0.6, 0.2, 0.4, 0.2)),
(name:'mhc_hhd';r:'0';w:'0';g:( 0.5, 0.5, 0.1, 0.3)),
(name:'mhc_hmc';r:'0';w:'x';g:( 0.0, 0.0, 0.0, 0.0)),
(name:'mhc_hmd';r:'0';w:'x';g:( 0.0, 0.0, 0.0, 0.0)),
(name:'mhc_mhc';r:'0';w:'9';g:( 1.2, 0.9, 0.6, 0.9)),
(name:'mhc_mhd';r:'0';w:'2';g:( 2.5, 5.7, 0.7, 2.3)),
(name:'mhc_mmc';r:'0';w:'1';g:( 5.2, 2.0, 5.7, 5.2)),
(name:'mhc_mmd';r:'0';w:'1';g:( 0.0, 0.0, 0.0, 0.0)),
(name:'mhd_hhc';r:'0';w:'0';g:( 0.6, 0.3, 0.2, 0.2)),
(name:'mhd_hhd';r:'0';w:'0';g:( 2.9, 6.1, 0.6, 2.0)),
(name:'mhd_hmc';r:'0';w:'x';g:( 0.0, 0.0, 0.0, 0.0)),
(name:'mhd_hmd';r:'0';w:'x';g:( 0.0, 0.0, 0.0, 0.0)),
(name:'mhd_mhc';r:'0';w:'9';g:( 2.7, 5.8, 0.8, 2.4)),
(name:'mhd_mhd';r:'0';w:'2';g:( 6.3,31.6, 1.1, 6.8)),
(name:'mhd_mmc';r:'0';w:'1';g:(13.1,13.1, 8.3,15.9)),
(name:'mhd_mmd';r:'0';w:'1';g:( 0.0, 0.0, 0.0, 0.0)),
(name:'mmc_hhc';r:'1';w:'1';g:( 1.7, 0.2, 1.5, 0.5)),
(name:'mmc_hhd';r:'1';w:'1';g:( 1.3, 0.6, 0.6, 0.9)),
(name:'mmc_hmc';r:'1';w:'x';g:( 0.0, 0.0, 0.0, 0.0)),
(name:'mmc_hmd';r:'1';w:'x';g:( 0.0, 0.0, 0.0, 0.0)),
(name:'mmc_mhc';r:'1';w:'2';g:( 4.4, 1.3, 5.3, 4.4)),
(name:'mmc_mhd';r:'1';w:'2';g:( 9.8, 7.8, 7.1,14.0)),
(name:'mmc_mmc';r:'1';w:'8';g:(21.7, 3.1,51.4,29.5)),
(name:'mmc_mmd';r:'1';w:'8';g:( 0.0, 0.0, 0.0, 0.0)),
(name:'mmd_hhc';r:'8';w:'8';g:( 0.4, 0.0, 0.1, 0.1)),
(name:'mmd_hhd';r:'8';w:'8';g:( 0.5, 0.4, 0.1, 0.1)),
(name:'mmd_hmc';r:'8';w:'x';g:( 0.0, 0.0, 0.0, 0.0)),
(name:'mmd_hmd';r:'8';w:'x';g:( 0.0, 0.0, 0.0, 0.0)),
(name:'mmd_mhc';r:'8';w:'9';g:( 1.8, 0.9, 1.0, 1.1)),
(name:'mmd_mhd';r:'8';w:'9';g:( 3.7, 6.5, 1.3, 2.7)),
(name:'mmd_mmc';r:'8';w:'1';g:( 8.6, 2.6, 8.0, 6.1)),
(name:'mmd_mmd';r:'8';w:'1';g:( 0.0, 0.0, 0.0, 0.0)));

function setblock(c:char):byte;
 begin
  case c of
  '0': setblock:=block0;
  '1': setblock:=block1;
  '2': setblock:=block2;
  '8': setblock:=block8;
  '9': setblock:=block9;
  end;
end;
var sum:array[0..3] of real;

begin
if odd(smsw) then error (' Virtuellen Modus mag ich nicht',2);
coproz:=coproztyp(test8087);
get_cachesizes (L1_cachesize, L2_cachesize, asso);
get_bussize(Intbussize,1); { Bussize fr Kern zu L1-Cache }
get_bussize(bussize,$200); { Bussize L1-Cache zu L2-Cache }

new (x[0]); x0:=x[0]; new (x[1]); x1:=x[1]; new (x[2]); x2:=x[2];
AAMtime:=codemess ($D40A);
AADtime:=codemess ($D50A);
rel:=AAMtime/AADtime;
Writeln;writeln;writeln;
Writeln ('PROZESSOR- und CACHEMESSER c''t 1/93/as V1.1':50);

Prozessor:=unbekannt;
Write ('Prozessor ',': ':10);
{ !!! ge„ndert, da Pentium Cachegr”áe fr Daten = 8K !!!}

 if (L1_Cachesize=$2000) and (asso=8) then Prozessor:=pentium { noch ungetestet !}
else
 if (L1_cachesize=$4000) and (asso=4) then Prozessor:=ibm486slc2 { " " }
else
 if rel < 1.1 then if coproz=i387 then prozessor:=i486DX {oder i487}
                                   else prozessor:=i486SX
   else if rel < 1.14 then
           if L1_cachesize=$2000
              then prozessor:=ibm386SLC
              else if bussize = 32 then prozessor:=i386DX
                                   else prozessor:=i386SX
    else if rel < 1.25 then prozessor:= i286
     else if rel <1.70 then prozessor:= i8088 { oder 8086}
       else if rel < 3 then prozessor:= V20 { oder V30 }
        else if L1_cachesize=$800 then prozessor:= Cyr486S2
         else
              { if L1_cachesize=$400 then }
              if bussize = 32 then prozessor:=Cyr486DLC
                              else prozessor:=Cyr486SLC;

     writeln (prozname[ord(prozessor)]);
if prozessor=unbekannt then
   begin
   Takt:=33e6;
   Writeln ('Takt nicht bestimmbar, 33 MHz angenommen');
   end
   else
   begin
   Takt:=aamtab[ord(prozessor)]/aamtime;
   Writeln ('Takt',': ':17,Takt/1e6:4:1,' MHz');
   end;
Write ('interne Busbreite : ', intbussize,' Bit');
Writeln (' zwischen Prozessor und prim„ren Cache');
 Write ('externe Busbreite : ', bussize,' Bit');
 Writeln (' zwischen Prim„r- und Sekund„r-Cache oder Memory');
write ('Coprozessor',':':9);
if prozessor in [i486DX,i486DX2,i487DX,Pentium] then inc (coproz);
case coproz of nix : writeln (' nicht vorhanden oder nicht erkannt');
               i87 : Writeln (' 8087-Typ');
               i287 : Writeln (' 80287-Typ');
               i387 : Writeln (' 80387-Typ');
               i487 : Writeln (' i487-Typ');
               end;
cacheanzahl:=0;
Write ('Prim„r-Cache',': ':9);
If (L1_cachesize=0)
   then writeln ('nicht vorhanden oder nicht aktiv')
   else
   begin
   inc (cacheanzahl);
   Write (L1_cachesize shr 10, ' KByte ');
   if (asso = 1) then Writeln (' direct mapped')
                 else Writeln (asso, '-fach assoziativ');
   end;

Write ('Sekund„r-Cache',': ':7);
  if (L2_Cachesize=0)
    then Write ('nicht vorhanden oder nicht aktiv')
    else
    begin
     Write ((L2_cachesize shr 10), ' KByte ');
     inc (cacheanzahl);
    end;
  if (L1_Cachesize = 0) and (L2_Cachesize > 0) then
  begin
  if (asso = 1) then Write (' direct mapped')
                else Write (asso, '-fach assoziativ');
  end;
 Writeln;
 bsize:= L1_Cachesize;
 if (bsize > $2000) or (bsize = 0) then Bsize:=$2000; { maximal 8K }
 cachesize:=L2_Cachesize div $2000; { ext. Cachegr”áe in 8K-Bl”cken}
 If cachesize=0 then Cachesize:=8; { kein ext Cache da, dann 64K }

if maxavail < longint(cachesize)*$2000 then error
   (' Heap zu klein bzw. Cache zu groá fr weitere Messungen',2);
for i:=3 to cachesize +2 do new (x[i]);
block8:=cachesize;
block9:=block8+1;
XP:=x[block9];
{ alle MOV-Messungen ausfhren; }
for i:=0 to 4 do sum[i]:=0;
for i:=0 to 63 do with messarray[i] do
  if (r <> 'x') and (w <>'x') then
  begin
  zlin[i]:= movemess (setblock(r),setblock(w));
  for j:=0 to 3 do sum[j]:=sum[j]+zlin[i]*g[j]/100;
  end;
if (cacheanzahl > 0) then
begin
writeback:=z[m,m,d,m,m,c]/z[m,m,d,m,h,c]> 1.1;
dirtyok := (z[m,m,d,m,h,d]/z[m,m,c,m,h,d]) > 1.1;
if Writeback then writeln ('Write-Strategie : Write Back')
             else writeln ('Write-Strategie : Write Through');
if writeback then
  begin
  if dirtyok then Writeln ('Dirty Tag',': ':12,'ok')
             else Writeln
  (chr(7),'Dirty Tag',': ':8, '!!!!! Dirty Tag nicht ok !!!!!');
  Writeln ('Extra Dirty-Waits :',
     abs(z[m,h,d,m,h,c]-z[m,h,d,m,h,d])/bsize/takt:2:0);
  end;
Writeln;
end;
Writeln ('Datenfluá- und Bus-Performance':40);
{ !!! Angaben fr Cyrix ge„ndert auf 1 oder 2-KB-Bl”cke !!!}
Write ('Beste Zeit fr ',bsize div $400,'K MOVSB (Cache /Page Hits)',':':4);
showtime(z[h,h,c,h,h,c]);
Write ('mittlere " fr ',bsize div $400,'K MOVSB (Miss + Hit)',':':10);
showtime (z[m,h,d,m,h,d]);
Write ('mittlere " fr ',bsize div $400,'K MOVSB (bei clean) ',':':10);
showtime (z[m,m,c,m,h,d]);

Write ('mittlere " fr ',bsize div $400,'K MOVSB (bei dirty) ',':':10);
showtime (z[m,m,d,m,h,d]);

Write ('Schlechteste " ',bsize div $400,'K MOVSB (Cache misses)',':':8);
showtime (z[m,m,d,m,m,c]);
Writeln;
for i:=0 to 3 do
 begin
  if L2_cachesize > 0 then
   case i of
    0 : Write ('im Mittel bei 64 KB L2-Cache /DOS (640K):');
    1 : Write ('im Mittel bei 256 KB L2-Cache /DOS (640K):');
    2 : Write ('im Mittel bei 64 KB L2-Cache /Windows (4MB) :');
    3 : Write ('im Mittel bei 256 KB L2-Cache /Windows (4MB) :');
   end
   else
    case i of
    0 : begin
        Write ('im Mittel unter DOS (640K):');
        inc(i);
        end;
    2 : begin
        Write ('im Mittel unter Windows (4MB) :');
        inc (i);
        end;
   end;

 showtime (sum[i]);
end;
end.
{ Memory to memory:
  ALR 486/33 : 10.3;8.4;7.6;4.2;3.7;6.7;7.5;6.5;6.7
  NoN 486/33 : 7.8;7.0;5.2;5.2;5.2;6.0;6.5;5.6;5.8 }

=========================================================================