Virtual Machine

ちょっと趣向を変えてコンパイラをつくってみることにした。とはいってもx86をターゲットにするなんてのは無理の無理無理なんで,Virtual Machineを作ってそれをターゲットにすることに。
で,VMを実装してみた。いろいろ考えた結果,Stack Machineです。

module VSM where
import Prelude hiding (catch)
import Data.Int
import Data.Char
import Data.Bits
import System.IO
import Control.Exception
import Data.Typeable
import Data.Array.MArray
import Data.Array.IArray
import Data.Array.IO

data Op = Add | Sub | Mul | Div | Mod
        | And | Or | Not | Xor | Ash | Lsh
        | Neg 
        | Load | Store | LLoad | LStore
        | Push VMWord | Pop | Flip | Dup
        | Jump | Branch | Call | Return
        | Break
        | VCall
          deriving Show

data VSMException = BreakException
                    deriving Typeable

type VMWord = Int32
type IP = VMWord
type FP = VMWord
type Stack = [VMWord]
type DMem = IOUArray VMWord VMWord
type Ports = IOArray VMWord Handle
type IMem = Array VMWord Op
type State = (IP,FP,Stack)
type Mem = (IMem,DMem,Ports)

dMemSize :: VMWord
dMemSize = 10000
run :: [Op] -> IO State
run codes =
    let imem = listArray (0,fromIntegral (length codes - 1)) codes
    in do
      dmem <- newArray (0,dMemSize-1) 0
      ports <- newArray_ (0,100)
      runVM (imem,dmem,ports) (0,0,[])
      

runVM :: Mem -> State -> IO State
runVM m@(im,dm,ps) s@(ip,fp,st)
    = catchDyn (do {(ip',fp',st') <- exec (im ! ip) (dm,ps) s
                   ;runVM m (ip',fp',st')
                   })
        (\e -> case (e::VSMException) of
                   BreakException -> return s)

exec :: Op -> (DMem,Ports) -> State -> IO State
exec Add m s = execBinOp Add m s
exec Sub m s = execBinOp Sub m s
exec Mul m s = execBinOp Mul m s
exec Div m s = execBinOp Div m s
exec Mod m s = execBinOp Mod m s
exec And m s = execBinOp And m s
exec Or  m s = execBinOp Or  m s
exec Not m s = execUnOp  Not m s
exec Xor m s = execBinOp Xor m s
exec Ash m s = execBinOp Ash m s
exec Lsh m s = execBinOp Lsh m s
exec Neg m s = execUnOp  Neg m s
exec Load (dm,_) (ip,fp,v0:vs) = do
    v <- readArray dm v0
    return (ip+1,fp,v:vs)
exec Store (dm,_) (ip,fp,v1:v0:vs) = writeArray dm v0 v1 >> return (ip+1,fp,vs)
exec LLoad (dm,_) (ip,fp,v0:vs) = do
    v <- readArray dm (fp + v0)
    return (ip+1,fp,v:vs)
exec LStore (dm,_) (ip,fp,v1:v0:vs) = do
    writeArray dm (fp + v0) v1
    return (ip+1,fp,vs)
exec (Push v) _ (ip,fp,vs) = return (ip+1,fp,v:vs)
exec Pop _ (ip,fp,v:vs) = return (ip+1,fp,vs)
exec Flip _ (ip,fp,v1:v0:vs) = return (ip+1,fp,v0:v1:vs)
exec Dup _ (ip,fp,v:vs) = return (ip+1,fp,v:v:vs)
exec Jump _ (ip,fp,v0:vs) = return (v0,fp,vs)
exec Branch _ (ip,fp,v1:v0:vs) = if v0 == 0
                                 then return (ip+1,fp,vs)
                                 else return (ip+v1,fp,vs)
exec Call _ (ip,fp,v1:v0:vs) = return (v0,v1,fp:ip:vs)
exec Return _ (ip,fp,v1:v0:vs) = return (v0,v1,vs)
exec Break _ _ = throwDyn BreakException
exec VCall m (ip,fp,v0:vs) = do
    (ip,fp,vs') <- vOp v0 m (ip,fp,vs)
    return (ip+1,fp,vs')


execUnOp :: Op -> (DMem,Ports) -> State -> IO State
execUnOp op _ (ip,fp,v0:vs) = let v = hOp1 op v0
                              in return (ip+1,fp,v:vs)
hOp1 Not v = complement v
hOp1 Neg v | v < 0 = 1
           | otherwise = 0

execBinOp :: Op -> (DMem,Ports) -> State -> IO State
execBinOp op _ (ip,fp,v1:v0:vs) = let v = hOp2 op v0 v1
                               in return (ip+1,fp,v:vs)
hOp2 :: Op -> VMWord -> VMWord -> VMWord
hOp2 Add a b = (+) a b
hOp2 Sub a b = (-) a b
hOp2 Mul a b = (*) a b
hOp2 Div a b = div a b
hOp2 Mod a b = mod a b
hOp2 And a b = (.&.) a b
hOp2 Or  a b = (.|.) a b
hOp2 Xor a b = xor a b
hOp2 Ash a b = shift a (fromIntegral b)
hOp2 Lsh a b | a >= 0 = shift a (fromIntegral b)
             | otherwise = complement (shift (complement a) (fromIntegral b))

vOp :: VMWord -> (DMem,Ports) -> State -> IO State
-- put-char
vOp 0 (_,ps) (ip,fp,v1:v0:vs) = do
    h <- readArray ps v0
    hPutChar h (chr $ fromIntegral v1)
    return (ip+1,fp,vs)
-- get-char
vOp 1 (_,ps) (ip,fp,v0:vs) = do
    h <- readArray ps v0
    c <- hGetChar h
    return (ip+1,fp,(fromIntegral $ ord c):vs)

Typeableをderivingしているのでghcコンパイルする場合は-XDeriveDataTypeableというオプションをつける必要がある。ちなみに下記のようにすればオプションをつける必要はない。

data VSMException = BreakException
instance Typeable VSMException where
   typeOf a = mkTyConApp (mkTyCon "VSMException") []

今後はこのVMをターゲットにしたScheme風言語のコンパイラを作成する予定。VM自体もコンパイラ作成しながら変更することでちゃんとしたものなる予定。ちなみにいつ完成するかは未定。。。