unit U_TicTacToeMachine;
{Copyright 2002, Gary Darby, Intellitech Systems Inc., www.DelphiForFun.org

 This program may be used or modified for any non-commercial purpose
 so long as this original notice remains in place.
 All other rights are reserved
 }

{
An artificial intelligence demonstration of a
"machine" that learns to play tic-tac-toe by trial
and error.    This is a computer model of the
original machine, MENACE,  invented by
Donald Michie in 1961 using 300 matchboxes
representing 300 board positions.  Each box
contains colored beads for each available cell.

The machine always plays first.  It plays by
selecting a bead randomly from the box
representing the current board configurations.
When it wins or draws it is "rewarded" by
adding beads of the winning move colors  to
each box used.  Losses are punished by
confiscating the selected beads.

You may click avalaible cells to play against the
machine or use the "Random" button to train
the machine.  It may take a few thousand
random games to train it well.
}

interface

uses
Windows, Messages, SysUtils, Classes, Graphics, Controls, Forms, Dialogs,
  StdCtrls, Spin, ExtCtrls;

type
TweightArray=array[1..9] of integer;
  PWeightrec=^TWeightrec;
  TWeightrec=record  {a record saved with each board position containing more
                      info about the position}
w:TWeightArray;{array of weights (# of beads of each color) for possible moves}
movelevel:integer; {which level are we at}
lastchoice:byte;   {the cell filled to get to this position}
end;


  TForm1 = class(TForm)
    ProtoEdt: TEdit;
    ResetBtn: TButton;
    ListBox1: TListBox;
    ListBox2: TListBox;
    Label1: TLabel;
    StartBtn: TButton;
    Run100Btn: TButton;
    ResetWeightsBtn: TButton;
    GameCount: TSpinEdit;
    Label2: TLabel;
    Label3: TLabel;
    DebugPanel: TPanel;
    Label5: TLabel;
    Label4: TLabel;
    ListBox3: TListBox;
    DebugBtn: TButton;
    Memo1: TMemo;
procedure FormActivate(Sender: TObject);
procedure ResetBtnClick(Sender: TObject);
procedure StartBtnClick(Sender: TObject);
procedure Run100BtnClick(Sender: TObject);
procedure ResetWeightsBtnClick(Sender: TObject);
procedure DebugBtnClick(Sender: TObject);
public
board:array[0..8] of TEdit; {array of TEdits visualizing the board}
level:integer;    {current level}
winlevel:integer;
    xwins, Owins, totgames: integer;  {statistics}
strkey:string; {9 character string representation of board}
Positions:TStringList; {list of 304 "matchboxes" representing possible positions}
gameover:boolean;
    autoplay:boolean;
    moves:array[1..4] of integer; {positions list index values for current game}
nbrmoves:integer; {how many moves in current game (X moves only)}
procedure BoardKeyClick(Sender: TObject); {Human move maker}
{procedure BoardToKey;}
procedure GenNext(key:string;templevel:integer); {Initialize positions list}
function FindTransform(key:String;  {loojk for key in the list, allowing
                                         rotations and reflections}
var TransformNbr, listposition:integer):boolean;
function transform(s:string;t:integer):string; {get a new key after
                                                    transform "t"}
function inversetransform(s:string;t:integer):string; {reverse a transform}
procedure makemove; {machine move maker}
function winner(p:char):boolean; {check for 3 in a row for player "p"}
procedure wrapup(p:char); {end of game total and adjust weights}
procedure initweights; {initialize weights}
end;

var Form1: TForm1;

implementation

{$R *.DFM}

Uses UMakeCaption;

var
transforms:array[0..7] of array[1..9] of integer =  {the 8 transforms}
{identity}   ((1,2,3,4,5,6,7,8,9),   {"moveto" index values}
{right90}     (7,4,1,8,5,2,9,6,3),
{left90}      (3,6,9,2,5,8,1,4,7),
{180}         (9,8,7,6,5,4,3,2,1),
{Mirror}      (7,8,9,4,5,6,1,2,3),
{Mirror+Left} (9,6,3,8,5,2,7,4,1),
{Mirror+Right}(1,4,7,2,5,8,3,6,9),
{Mirror+180}  (3,2,1,6,5,4,9,8,7)
     );

const
{initial bead counts of each availablr cell color for machines 1st 4 moves}
initialweights:array[0..3] of integer=(4,3,2,1);

{**************** FormActivate **************}
procedure TForm1.FormActivate(Sender:TObject);
var
i:integer;
  fname:string;
  c1,c3,c5,c7:integer;
begin
makecaption('Tic-Tac-Toe Machine', #169+' 2002 G Darby, www.delphiforfun.org',self);
  randomize;
  level:=1;
{create 9 Tedits as the board array}
for i:=0 to 8 do
begin
board[i]:=TEdit.create(self);
with board[i] do
begin
parent:=self;
      autosize:=protoedt.autosize;
      width:=protoedt.width;
      height:=protoedt.height;
{move left 0,1, or 2 times the width of prototype,
       subtract 0, 1 or 2 pixels to make interior borders overlap}
left:= protoedt.left+(i mod 3)*protoedt.width- i mod 3;
{move down 0,1, or 2 times the height of prototype,
       subtract 0, 1 or 2 pixels to make interior borders overlap}
top:= protoedt.top+(i div 3)*protoedt.height- i div 3;
      text:=' ';
      tag:=i+1;  {index of this cell's position in the key "strkey"}
font.size:=protoedt.Font.size;
      font.style:=protoedt.font.style;
      borderstyle:=protoedt.borderstyle;
      onclick:=BoardkeyClick;
      readonly:=true;
end;
end;
{generate all matchboxes for 1st mover, "X"}
Positions:=TStringList.create;
  positions.sorted:=true;
  strkey:=stringofchar('-',9);
  fname:=extractfilepath(application.exename)+'Positions.str';
{load positions if file exists}
if fileexists(fname) then positions.loadfromfile(fname)
else
begin
Gennext(strkey,0);  {recursive procedure that builds the list
                         of "matchboxes" (positions)}
positions.savetofile(fname);
end;
  Initweights; {Add the "beads" to the "matchboxes"}
{accumulate # of moves for each level for diplay}
c1:=0; c3:=0; c5:=0; c7:=0;
for i:=0 to positions.count-1 do
begin
with pweightrec(positions.objects[i])^ do
case movelevel of
1: inc(c1);
    3: inc(c3);
    5: inc(c5);
    7: inc(c7);
end;
end;
  listbox2.items.add('There are '+ inttostr(c1) + ' level 1 positions');
  listbox2.items.add('There are '+ inttostr(c3) + ' level 3 positions');
  listbox2.items.add('There are '+ inttostr(c5) + ' level 5 positions');
  listbox2.items.add('There are '+ inttostr(c7) + ' level 7 positions');
end;

{******************* Initweights *************}
procedure TForm1.initweights;
var
i,j,v,n:integer;
    initialcount:integer;
    s, wstr:string;
    P:Pweightrec;
begin
for i:= 0 to positions.count-1 do
with positions do
begin
s:=strings[i];
      n:=0;
{count the "O"s to determine level}
for j:= 1 to 9 do  if s[j]='O' then inc(n);
      initialcount:=initialweights[n];
      new(P); {make a new weightrec}
objects[i]:=pointer(p);
      wstr:='';
with Pweightrec(objects[i])^ do
begin
for j:=1 to 9 do {Assign the initial bead count to each empty slot}
begin
if s[j]='-' then v:=initialcount else v:=0;
          w[j]:=v;
          wstr:=wstr+inttostr(v)+',';
end;
        lastchoice:=0; {initialize lastchoice variable}
movelevel:=2*n+1;
end;
      listbox1.items.add(strings[i]+'('+wstr+')');
end;
end;{initweights}

{**************** Transform ****************}
function TForm1.transform(s:string;t:integer):string;
{given a key and a transform, return the new key, rotated and/or mirrored}
var i:integer;
begin
result:=s;
for i:=1 to 9 do
result[i]:=s[transforms[t,i]];
end;

{**************** InverseTransform ****************}
function TForm1.InverseTransform(s:string;t:integer):string;
{Reverse the previous transform t, return the new key}
var i:integer;
begin
case t of
1: {right90} t:=2; {left 90}
2: {left90}  t:=1; {right 90}
end; {all the other transformations are reflexive, i.e. T(T(s))=s}
result:=s;
for i:=1 to 9 do
result[i]:=s[transforms[t,i]];
end;

{************** Winner *************}
function TForm1.winner(p:char):boolean;
{Check for three in a row}
var s:string;
begin
s:=strkey;
if (  (s[1]=p) and (s[2]=p) and (s[3]=p) )
or ((s[4]=p) and (s[5]=p) and (s[6]=p) )
or ((s[7]=p) and (s[8]=p) and (s[9]=p) )
or ((s[1]=p) and (s[4]=p) and (s[7]=p) )
or ((s[2]=p) and (s[5]=p) and (s[8]=p) )
or ((s[3]=p) and (s[6]=p) and (s[9]=p) )
or ((s[1]=p) and (s[5]=p) and (s[9]=p) )
or ((s[3]=p) and (s[5]=p) and (s[7]=p) )
then result:=true  else result:=false;
end;

{**************** Wrapup *************}
procedure Tform1.wrapup(p:char);
{game over - adjust weight and reset board image for next game}
var
i,reward, rewardlast:integer;
  msg:string;
begin
case p of
'X':
begin
msg:='X wins!';
      inc(xwins);
      reward:=+4;
      rewardlast:=1000;  {big reward at the last level for winning}
end;
'O':
begin
msg:='O wins';
      inc(OWins);
      reward:=-1;
      rewardlast:=-1000; {big punishment for the losing move}
end;
else
begin
msg:='A draw!';
     reward:=+1;
     rewardlast:=+1;
end;
end;
If not autoplay then showmessage(Msg);
  inc(totgames);

for i:=1 to nbrmoves do
with PWeightrec(positions.objects[moves[i]])^ do
begin
if i<nbrmoves then w[lastchoice]:=w[lastchoice]+reward
else w[lastchoice]:=w[lastchoice]+rewardlast;
if w[lastchoice]<0 then w[lastchoice]:=0;  {don't let weights go negative}
lastchoice:=0;
end;
  listbox2.clear;
  listbox2.items.add(inttostr(totgames)+ ' games played');
  listbox2.items.add(inttostr(Xwins)+ ' won by machine');
  listbox2.items.add(inttostr(Owins)+ ' won by opponent');
  listbox2.items.add(inttostr(totgames-XWins-Owins)+ ' draws');
  gameover:=true;
end;


{************* Makemove ************}
procedure TForm1.makemove;
{Computer makes a move}
var i,n, tnbr,sum:integer;
     listposition:integer;
     newkey, wstr:string;
     prob:single;
begin
{
    1. Find the current board position in the list, (transform if necessary),
    2. Select a random move based on weight values for next moves that
       are contained with each list entry.
    3. Update lastchoice value in the list entry and make the move
    }
if level=9 then {fill in the last position}
begin
for i:=1 to 9 do if strkey[i]='-' then strkey[i]:='X';
end
else
begin  {Look for board configuration in the positions list}
If not findtransform(strkey, tnbr, listposition) then
begin
showmessage('System error - position '+strkey +' not found in table');
       i:=random(8);
if i>0 then strkey:=transform(strkey,random(8));
end
else
with Pweightrec(positions.objects[listposition])^ do
begin
newkey:=transform(strkey,tnbr);
       sum:=0;
for i:=1 to 9 do
begin
sum:=sum+w[i];
         wstr:=wstr+inttostr(w[i])+',';
end;
if sum>0 then
begin
n:=random(sum)+1;  { get a random value of the sum of beads}
i:=0;
         sum:=0;
while  sum<n do {and count beads up until we exceed the value}
begin
inc(i);
if i>9
then showmessage('Weights error');
           sum:=sum+w[i];
end;
         prob:=w[i]/sum; {probability for display}
end
else
begin
{Zero weights?  just move to 1st available cell}
prob:=1/(10-movelevel);
for i:= 1 to 9 do if strkey[i]='-' then break;
end;
       listbox3.items.add('Transform('+strkey+','+inttostr(tnbr)+') = '+newkey);
       listbox3.items.add('    Weights:'+wstr);

       listbox3.items.add('    Selected cell:'+inttostr(i)+ '( probability '+
                         format('%4.2f',[prob]) +')');
       newkey[i]:='X';
       lastchoice:=i;
if tnbr>0 then strkey:=inverseTransform(newkey,tnbr)
else strkey:=newkey;
       listbox3.items.add('    InvTransform('+newkey+','+inttostr(tnbr)+') = '+strkey);
       inc(nbrmoves);
       moves[nbrmoves]:=listposition;
end;
end;
for i:=1 to length(strkey) do {fill in the visual board}
if strkey[i]<>'-' then board[i-1].text:=strkey[i]
else board[i-1].text:=' ';
if winner('X') then  {check if we won}
begin
winlevel:=level;
     wrapup('X');
end
else if level=9 then wrapup('-')
else inc(level);
end;  {makemove}

{**************** GenNext **************}
procedure TForm1.GenNext(key:string;templevel:integer);
{Generates the 304 "Matchboxes" that make up the machine,
 normally one time only since psitions are saved in a file}

procedure posadd(newkey:string);  {add non-winning positions to the list}
begin
strkey:=newkey;
{no need to put winning positions in the list}
if (not winner('O')) and (not winner('X')) then positions.add(newkey);
end;

var
ch:char;
  newkey:string;
  i,index, listpos:integer;
begin
if templevel mod 2=1 then ch:='X' else if templevel>1 then ch:='O' else ch:='-';
for i :=1 to length(key) do
begin
if key[i]='-' then
begin
newkey:=key;
      newkey[i]:=ch;
if templevel mod 2=0 then {save positions after "O" moves if we haven't already}
begin
index:=positions.indexof(newkey);
if index<0 then {not found}
begin
if not FindTransform(newkey, index,listpos) {and if not saved rotated }
then  posadd(newkey);         {or mirrored version, then add it}
end;
end;
if templevel<6 then GenNext(newkey,templevel+1); {generate next level, up to 6}
end;
if templevel=0 then break;
end;
end;

{****************** FindTransorm ************}
function TForm1.FindTransform(key:String;
var transformNbr, listposition:integer):boolean;
{try all unique rotations & reflections looking for match already in list}
var
i,j:integer;
  tempkey:string;
begin
tempkey:=key;
for i:=0 to 7 do
begin
for j:=1 to 9 do tempkey[j]:=key[transforms[i,j]];
{debug - listbox2.items.add(tempkey);}
result:=positions.find(tempkey,listposition);
if result then
begin
transformNbr:=i;
      break;
end;
end;
end;


{***************** BoardKeyClick ************}
procedure TForm1.BoardKeyClick(Sender:TObject);
{Accept (or reject) a click on the board}
begin
with tEdit(sender) do
begin
if text=' ' then
begin
if level mod 2 =1
then showmessage('Machine moves first, click Start button to begin a game')
else text:='O';
      strkey[tag]:='O';  {update the key}
if winner('O') then wrapup('O')
else
begin
inc(level);
        makemove;
end;
end
else messagebeep(MB_ICONEXCLAMATION);
end;
end;


{********** ResetBtnClick ****************}
procedure TForm1.ResetBtnClick(Sender: TObject);
{Reset games won/lost counters}
begin
totgames:=0;
  XWins:=0;
  OWins:=0;
end;

{************** StartBtnClick ***********}
procedure TForm1.StartBtnClick(Sender: TObject);
{Start a new gane}
var i:integer;
begin
level:=1;
  gameover:=false;
  autoplay:=false;
  nbrmoves:=0;
for i:= 0 to 8 do board[i].text:=' ';
  strkey:='---------';
  listbox3.Clear;
  makemove;
end;

{************* Run100BtnClick ************}
procedure TForm1.Run100BtnClick(Sender: TObject);
{run a bunch of random games}
var
i,n:integer;
begin
gameover:=true;  {to force 1st startbtnclick}
startbtnclick(sender);
  autoplay:=true;
for i:= 1 to gamecount.value do
begin
repeat
repeat
n:=random(9);
until strkey[n+1]='-';
      boardkeyclick(board[n]);
      application.processmessages;
until gameover;
If i<gamecount.value then startbtnclick(sender);
    autoplay:=true;
end;
end;

{******************* ResetWeightBtnClick ***********}
procedure TForm1.ResetWeightsBtnClick(Sender: TObject);
begin  initweights; end;

procedure TForm1.DebugBtnClick(Sender: TObject);
begin
debugpanel.visible:= not debugpanel.visible;
end;

end.