| 
 
 -- Don 
Golding -- 
Angelus Research Corp. -- 
dgolding@angelusresearch.com -- 
Version 3 --                       
Forth Processor Design       
 -- -- This 
code represents my current thoughts on designing a Forth Processor in 
VHDL. -- 
Please review it and email me with your input on either Forth design issues or 
 -- VHDL 
design issues. -- -- The 
goal is to build a generic Forth processor that can be included in VHDL 
designs. -- If 
it could fit into a Xilinx 4005 or 4010 it would be ideal! -- 
Forth is really a virtual microprocessor implemented on other various 
processors -- from 
68HC11 to VAX machines and supercomputers.  
You will currently find Forth used -- as 
the driver for PCI hardware in high end Macintosh's and Sun 
Workstations. -- -- This 
is an attempt to create a real Forth Processor on an FPGA or ASIC using 
VHDL. -- 
Previous real Forth Microprocessors include: Harris RTX2000, SHABOOM, 
F21,etc. -- The 
current attempts F21, etc. are trying to make 500mips 
screamers. -- 
There are also people like Dr. Ting using the Schematic editor to create 
Forth -- 
processors.  I wonder how a 
Schematic designed Forth processor will compare to a VHDL -- 
based design in speed and the number of gates used.    -- I 
think a straight forward simple design will have considerable 
applications -- when 
you need a processor included in your FPGA/ASIC design. -- FPGA 
operate at 200mhz, I don't know how fast this design will be, but it's speed 
 -- 
should be limited to the external RAM speed when memory access is 
required. -- 
Internal register to register operations should be 50-200mhz 
range. --       
 -- The 
preliminary specifications are: -- --  16 bit data bus (to save space, could be 
8 bit but it would take more statements) --  16 bit address bus --  by editing the code in the Entity 
declariations, you implement 32, 64, ? designs -- --  Return Stack 
levels=16 --  Data Stack levels=16 (could be smaller, 
4 items could be ok) --  Output port A is 8 
lines --  Output port B is 8 lines   --  Motorola SPI compatible port 
(SPI_In,SPI_Out,SPI_Ck,SS/) -- --  By editing the code in the Entity 
declariations, you can add serial ports, parallel --  ports, adc's or just about anything you 
can imagine. --   library 
IEEE; use 
IEEE.std_logic_1164.all;   entity 
Proc is     port (         
DataBus: inout STD_LOGIC_VECTOR (15 downto 0);         
OutPortA: out STD_LOGIC_VECTOR (7 downto 0);         
OutPortB: out STD_LOGIC_VECTOR (7 downto 0);         
OutputA: out STD_LOGIC_LOGIC;             
OutputB: out STD_LOGIC_LOGIC;       
         
AddressBus: out STD_LOGIC_VECTOR (15 downto 0);         
Reset: in STD_LOGIC;         
SPI_In: in STD_LOGIC;         
SPI_Out: out STD_LOGIC;         SS: 
in STD_LOGIC;         
SPI_Ck: in STD_LOGIC;         
clock: in STD_LOGIC;         rd: 
out STD_LOGIC;     ); end 
Proc;   architecture Proc_arch of Proc 
is      --define op codes, only 25 so 
far...    type op_code is(abort, depth, dup, 
pick, over, swap, >r, r>, r@, drop,                    
rot, equal, zero_equal, greater, greater_than,                     
less_than, store, +store, fetch, plus, minus, times                    
divide, branch, Obranch );                    
    -- check these for correct 
sizes    type data_word is array(15 downto 
0) of STD_ULOGIC;    --16 
bit wide    type Return_stack is array (15 
downto 0) of data_word; --16 bits wide, 16 deep     type Data_stack is array (15 
downto 0) of data_word;   --16 
bits wide, 16 deep     type memory_size is range (15 
downto 0);               
--64K max?  
          constant stack_depth: 
integer:=16;                   
--16 items max        variable rp of stack_depth;  -- return stack pointer 
    variable dp of stack_depth;  -- data stack 
pointer    variable mp of memory_size;  -- memory pointer    variable temp1 of data_word;  -- reg:Temp1 internal 
    variable error of data_word; -- 
reg:Error code    variable sucessful of bit;   -- Flag:operation 
sucessful      constant 
dstack_start:integer:=0;    constant 
write:bit:=0;    constant 
read:bit:=1;    constant 
dstack_overflow:integer:=1;  
--Errorcodes are defined here    constant 
dstack_underflow:integer:=2;    constant 
rstack_overflow:integer:=3;    constant 
rstack_underflow:integer:=4;    constant 
invalid_instruction:integer:=5;         --Forth stack 
manipulation primitives       --I think we 
should implement a circular que here.       --data_stack(dp) 
points to next available location, can use as temp 
variable       --before using 
push_dp_stack or pop_dp_stack procedures.       --each stack are 
really 16 registers!  Stack 
operations should be real fast!   procedure reset_proc 
is      begin             dp 
<= '0';         rp 
<= '0';         mp 
<= '0';        end 
reset_proc;   procedure push_dp_stack 
is    -- dp points the the next stack 
element not the current one after operation is completed.        begin                 
if dp = stack_depth then                
error<=dstack_overflow;                
reset_proc;             
else dp <= dp+1;             
end if    end push_dp_stack;               procedure pop_dp_stack 
is    -- dp points the the next stack 
element not the current one after operation is completed.       begin                 
if dp = dstack_start then                
error<=dstack_underflow;                
reset_proc;             
else dp <= dp-1;             
end if    end pop_dp_stack;           
    procedure push_rp_stack 
is    -- dp points the the next stack 
element not the current one after operation is completed.       
    begin                 
if rp = 16 then                
error<=rstack_overflow;                
reset_proc;             
else rp <= rp+1;             
end if    end push_rp_stack;            
   procedure pop_rp_stack 
is    -- dp points the the next stack 
element not the current one after operation is completed.     begin                 
if rp = 0 then                
error<=rstack_underflow;                
reset_proc;             
else rp <= rp-1;             
end if    end 
pop_rp_stack;   procedure proc_code(sucessful) 
is  --is the parameter list 
ok?   begin        
sucessful<=true;      case data_bus 
is           
            
when abort =>  --reset 
processor              
              
reset_proc;            
            
when depth =>  --put the 
depth of the stack on the top              
              
data_stack(dp) <= dp;              
up_data_stack;            
            
when dup =>  --duplicate 
the top item on data stack              
              
data_stack(dp)<=data_stack(dp+1);              
up_data_stack;                       
when pick =>  --get on 
data stack pointed to by TOS                
data_stack(dp)<=data_stack(data_stack(dp+1);              
up_data_stack;              
when over =>  --duplicate 
the second number on data stack                
data_stack(dp) <= data_stack(data_stack(dp+2);               
up_data_stack;           
           
when swap =>  --swap top 
two numbers on data stack                
return_stack(rp) <= data_stack(dp+1);              
data_stack(dp+1) <= data_stack(dp+2);              data_stack(dp+2) 
<= return_stack(rp);           
           
when >r =>  --move top 
of data stack to return stack                
return_stack(rp) <= data_stack(dp+1);              
pop_data_stack;              
push_return_stack;              
           
when r> =>  --move top 
of return stack to data stack                
data_stack(dp+1) <= return_stack(rp+1);              
pop_return_stack;              
push_data_stack;              
           
when r@ =>  --move top of 
return stack to data stack                
data_stack(dp) <= return_stack(rp+1);              
push_data_stack;           
           
when drop =>  --drop top 
number from data stack                  
pop_dp_stack;           
           
when rot =>  --rotate 3rd 
numbr to 1st on data stack               
return_stack(rp) <= data_stack(dp+1);            
             
data_stack(dp+1) <= data_stack(dp+3);           
            
when equal =>  -- if tos 
and second are equal then true               
              
if data_stack(dp+1)=data_stack(dp+2) then                 
pop_data_stack;                 
data_stack(dp+1)<='1';              
end if;   
           
           
when zero_equal =>  -- if 
tos=0 then tos=true              
              
if data_stack(dp+1)='0' then                 data_stack(dp+1)<='1';              
end if;   
            
             
when greater_than =>  -- 
if tos is greater then the sec then tos=true               
              
if data_stack(dp+1)>data_stack(dp+2) then                 
pop_data_stack;                 
data_stack(dp+1)<='1';              
end if;   
             
when less_than =>  -- if 
tos is less than the second item then tos=true               
              
if data_stack(dp+1)<data_stack(dp+2) then                 
pop_data_stack;                 
data_stack(dp+1)<='1';              
end if;   
           
           
when store =>  -- store 16 
bit value to memory               
rd<=write;             
addressBus <= data_stack(dp+1);             
dataBus <= data_stack(dp+2)             rd<=read;  -- probably need a delay here            
             
pop_data_stack;             
pop_data_stack;           
            
when +store =>  -- 
increment 16 bit value in memory               
rd<=read;             
addressBus <= data_stack(dp+1);             
data_bus <= data_bus+data_stack(dp+1);             
rd<=write;             
pop_data_stack;             
pop_data_stack;             
rd<=read;  -- probably 
need a delay here             
when fetch => -- get 16 bit value from memory             
rd<=read;             
data_stack(dp) <= dataBus;             
push_data_stack;             
when plus =>  --add two 16 
bit numbers               
data_stack(dp+1) <= data_stack(dp+2) + 
data_stack(dp+1);              
pop_data_stack;             when minus => 
--subtract two 16 bit numbers              
data_stack(dp+2) <= data_stack(dp+1) - 
data_stack(dp+2);              
pop_data_stack;              
           
when times =>  --multiply 
two 16 bit numbers              
data_stack(dp+2) <= data_stack(dp+1) * 
data_stack(dp+2);              
pop_data_stack;             
when divide => --divide two 16 bit numbers              
data_stack(dp+2) <= data_stack(dp+1) / 
data_stack(dp+2);              
pop_data_stack;              
           
when branch => --branch unconditionally              
mp=mp+1;              
rd<=read;              
mp=DataBus;              
           
when Obranch => --branch if tos = 0              
if data_stack(dp+1)='0' then                 
mp=mp+1;                 
rd<=read;                 
mp=DataBus;              
end if;   
           when 
others => -- not an opcode             
sucessful<=false;      end case;                    
 end 
proc_code;      synch: 
process(clock)      begin      if clock'event and 
clock='1' then  
         
clock<= not clock;  -- 
need a delay here?      end 
if; end 
process;         
 code: 
process(clock,reset,mp,rp) begin 
        if reset ='0' 
then       reset_proc;            
    else --get and process 
instruction            
                    
rd<=read;          
--set read/write line to read       
addressBus<=mp;      --output 
address       
       
proc_code;       
         
--Forth's inner interpreter(next)         if 
sucessful=true then -- it was a valid instruction            
mp <= mp+1;            
sucessful<=false;         
         
else                   
-- it wasn't a valid instruction            
error<=invalid_instruction;                
reset_proc;            
--        
         end 
if;              
    end if;      end 
process;      
 end 
Proc_arch;         |