当前位置: 首页 > news >正文

Haskell写的Parser

干货第二波,Haskell实现的Parser, 支持运算语句和备注等,输出可以作为Interpreter的输入

Parser combinator 选用的是 ReadP.

 

module Parser.Impl where

import SubsAst
import Text.ParserCombinators.ReadP as P
import Data.Char
import Control.Applicative

data ParseError = ParseError String
                deriving (Show, Eq)

-- allowed chars which should be allowed in pString
allowedChars :: ReadP Char
allowedChars = do
                  _ <- char '\\'
                  c <- satisfy (`elem` "\'nt\\")
                  case c of
                    'n' -> return '\n'
                    't' -> return '\t'
                    x -> return x

-- keywords which should not be used as a var name no when assign or get var
reserved :: [String]
reserved = ["if", "for", "of", "true","false","undefined"]

-- helper function for read chars of variable name
checkChar :: Char -> Char -> Bool
checkChar sym x = x == sym

-- return Var
pIdent :: ReadP Expr
pIdent = token $ do
            fist <- satisfy isLetter
            send <- munch (\x -> isLetter x || isDigit x || checkChar '_' x)
            let idt = fist : send
            if idt `notElem` reserved then return $ Var idt
            else fail "keyword can not be used as ident"

-- doing get Ident for assign
pIdget :: ReadP Ident
pIdget = do
            fist <- satisfy isLetter
            send <- munch (\x -> isLetter x || isDigit x || checkChar '_' x)
            let idt = fist : send
            if idt `notElem` reserved then return idt
            else fail "keyword can not be used as ident"

-- for readin number
pNumber :: ReadP Expr
pNumber = do
             sym <- option ' ' (char '-')
             number <- munch1 isDigit
             if length number <= 8 then
                return (Number (read (sym : number) :: Int))
             else fail "invalid number"

-- for readin string
pString :: ReadP Expr
pString = do
            _ <- char '\''
            s <- P.many (allowedChars
                     <|> satisfy (\x -> isPrint x && notElem x "\t\'\\'")
                     <|> (do
                             _ <- char '\\'
                             _ <- char '\n'
                             return '\0'))
            _ <- char '\''
            return (String s)

-- comment
pComment :: ReadP String
pComment = between (string "//") (char '\n') (munch ( /= '\n'))

-- token
-- we keep the skipMany1 pComment for skip comments before token but
--    it might cause time out sometimes
token :: ReadP a -> ReadP a
token p = do
             skipSpaces <|> skipMany1 pComment
             a <- p
             skipSpaces <|> skipMany1 pComment
             return a

-- symbol is using for read in a symbol or keyword such as "for" and "if"
symbol :: String -> ReadP String
symbol s = token $ string s

-- read true
pTrue :: ReadP Expr
pTrue = do
           _ <- token $ string "true"
           return TrueConst

-- read false
pFalse :: ReadP Expr
pFalse = do
            _ <- token $ string "false"
            return FalseConst

-- read Undefined
pUndefined :: ReadP Expr
pUndefined = do
                _ <- token $ string "undefined"
                return Undefined

-- stands for the Eq of our grammar tree
pAssign :: ReadP Expr
pAssign = do
             idt <- token pIdget
             _ <- symbol "="
             val <- token pExpr0
             return (Assign idt val)

-- array
pArray :: ReadP Expr
pArray = (do
           _ <- symbol "["
           exps <- pExprs
           _ <- symbol "]"
           return (Array exps))
     <++ (do
            _ <- symbol "["
            _ <- symbol "]"
            return (Array [Undefined]))

-- stands for the Expr of root of grammar tree
pExpr :: ReadP Expr
pExpr = (do
            a <- token pExpr1
            b <- token pouterComma
            return (Comma a b))
        <++ token pAssign
        <++ token pExpr1

-- stands for the Expr0 of grammar tree,with precidence of 0
pExpr0 :: ReadP Expr
pExpr0 = (do
             _ <- symbol "="
             token pExpr0)
      <++ token pExpr1

-- stands for the Expr1 of grammar tree,with precidence of 1
pExpr1 :: ReadP Expr
pExpr1 = (do
             exp2 <- token pExpr2
             _ <- symbol "==="
             exp1 <- token pExpr2
             pOpExpr1 (Call "===" [exp2,exp1])) <++
         (do
             exp2 <- token pExpr2
             _ <- symbol "<"
             exp1 <- token pExpr2
             pOpExpr1 (Call "<" [exp2,exp1])) <++
         token pAssign <++
         token pExpr2

-- to help pExpr1 deal with left associativity
pOpExpr1 :: Expr -> ReadP Expr
pOpExpr1 a = (do
                 _ <- symbol "==="
                 exp2 <- token pExpr1
                 return (Call "===" [a,exp2])) <++
             (do
                 _ <- symbol "<"
                 exp2 <- token pExpr1
                 return (Call "<" [a,exp2])) <++
             return a

-- stands for the Expr2 of grammar tree,with precidence of 2
pExpr2 :: ReadP Expr
pExpr2  = (do
              a <- token pExpr3
              _ <- symbol "+"
              b <- token pExpr3
              pOpExpr2 (Call "+" [a,b])) <++
          (do
              a <- token pExpr3
              _ <- symbol "-"
              b <- token pExpr3
              pOpExpr2 (Call "-" [a,b])) <++
          token pExpr3

-- to help pExpr2 deal with left associativity
pOpExpr2 :: Expr -> ReadP Expr
pOpExpr2 a = (do
                 _ <- symbol "+"
                 exp2 <- token pExpr2
                 return (Call "+" [a,exp2])) <++
             (do
                 _ <- symbol "-"
                 exp2 <- token pExpr2
                 return (Call "-" [a,exp2])) <++
             return a

-- stands for the Expr3 of grammar tree,with precidence of 3
pExpr3 :: ReadP Expr
pExpr3  = (do
              a <- token pExpr4
              _ <- symbol "*"
              b <- token pExpr4
              pOpExpr3 (Call "*" [a,b])) <++
          (do
              a <- token pExpr4
              _ <- symbol "%"
              b <- token pExpr4
              pOpExpr3 (Call "%" [a,b])) <++
          token pExpr4

-- to help pExpr3 deal with left associativity
pOpExpr3 :: Expr -> ReadP Expr
pOpExpr3 a = (do
                 _ <- symbol "*"
                 exp2 <- token pExpr3
                 return (Call "*" [a,exp2])) <++
             (do
                 _ <- symbol "%"
                 exp2 <- token pExpr3
                 return (Call "%" [a,exp2])) <++
             return a

-- stands for the Expr4 of grammar tree,with precidence of 4
pExpr4 :: ReadP Expr
pExpr4 = (do
             _ <- symbol "("
             e <- token pExpr
             _ <- symbol ")"
             return e)
         <++ pArray
         <++(do
                _ <- symbol "["
                e <- token pComprFor
                _ <- symbol "]"
                return e)
         <++ (do
                 idt <- token pIdget
                 _ <- symbol "("
                 e <- token pExprs
                 _ <- symbol ")"
                 return $ Call idt e)
         <++ pNumber
         <++ pIdent
         <++ pString
         <++ pTrue
         <++ pFalse
         <++ pUndefined

-- stands for the OuterComma of the grammar tree
pouterComma :: ReadP Expr
pouterComma = do
                 _ <- symbol ","
                 token pExpr

-- helpfunction for pcommaExprs and pExprs
commaHelper :: ReadP [Expr]
commaHelper = do
                exp1 <- token pExpr1
                com <- token pcommaExprs
                return (exp1:com)

-- stands for the commaExprs of the grammar tree
pcommaExprs :: ReadP [Expr]
pcommaExprs = (do
                  _ <- symbol ","
                  commaHelper)
          <++ (do
                  _ <- symbol ","
                  exp1 <- token pExpr1
                  return [exp1])

-- stands for the Exprs of the grammar tree
pExprs :: ReadP [Expr]
pExprs = (do
            exp1 <- token pExpr1
            return [exp1])
     <++ commaHelper

-- stands for ArrayFor of the grammar tree
pArrayFor :: ReadP ArrayCompr
pArrayFor = do
              _ <- symbol "for"
              _ <- symbol "("
              id' <- token pIdget
              _ <- symbol "of"
              exp1 <- token pExpr1
              _ <- symbol ")"
              ac <- token pArrayCompr
              return (ACFor id' exp1 ac)

-- stands for the ArrayIf of the grammar tree
pArrayIf :: ReadP ArrayCompr
pArrayIf = do
             _ <- symbol "if"
             _ <- symbol "("
             exp1 <- token pExpr1
             _ <- symbol ")"
             ac <- token pArrayCompr
             return (ACIf exp1 ac)

-- organize the ACIf ,ACFor and ACBody
pArrayCompr :: ReadP ArrayCompr
pArrayCompr = token pArrayIf
              <++ token pArrayFor
              <++ (do
                     ex <- token pExpr1
                     return (ACBody ex))

-- stands for the ArrayCompr of grammar tree
pComprFor :: ReadP Expr
pComprFor = do
               ar <- pArrayFor
               return (Compr ar)

-- out put the parse result
parseString :: String -> Either ParseError Expr
parseString str = if null (readP_to_S pExpr str) then
     Left(ParseError "Invalid expression") else
     (do
        let legalstr = [x | x <- readP_to_S pExpr str,snd x == ""]
        if null legalstr then Left( ParseError "Invalid expression") else
           (do
               let stri = fst (head legalstr)
               case stri of
                 String s -> Right (String [x | x <- s, x `notElem` "\NUL"])
                 a -> Right a))

  

转载于:https://www.cnblogs.com/hanani/p/9981211.html

相关文章:

  • Java String.getBytes()编码
  • smm架构的优势
  • 不学无数——SpringBoot入门Ⅲ
  • 比特币的暴跌史
  • 微信小程序直播,腾讯云直播+微信小程序实现实时直播
  • POI导出数据以Excel的方式录入,下载
  • 如何实现MetaMask签名授权后DAPP一键登录功能?
  • 双十一流量洪峰 支撑阿里核心业务的云数据库揭秘
  • python编程入门----测试与类编写
  • 项目(八) Jenkins持续集成与构建
  • Dashboard安装配置
  • vue中的slot
  • String和StringBuilder、StringBuffer的区别?
  • 关于Numba开源库(Python语法代码加速处理,看过一个例子,速度可提高6倍)
  • Aibee完成A轮6000万美元融资 宣布前阿里达摩院朱胜火加盟
  • 实现windows 窗体的自己画,网上摘抄的,学习了
  • [数据结构]链表的实现在PHP中
  • 【划重点】MySQL技术内幕:InnoDB存储引擎
  • 【刷算法】从上往下打印二叉树
  • Android单元测试 - 几个重要问题
  • Angular 响应式表单之下拉框
  • CEF与代理
  • CNN 在图像分割中的简史:从 R-CNN 到 Mask R-CNN
  • CSS3 聊天气泡框以及 inherit、currentColor 关键字
  • CSS居中完全指南——构建CSS居中决策树
  • egg(89)--egg之redis的发布和订阅
  • Leetcode 27 Remove Element
  • Linux编程学习笔记 | Linux多线程学习[2] - 线程的同步
  • opencv python Meanshift 和 Camshift
  • RedisSerializer之JdkSerializationRedisSerializer分析
  • scala基础语法(二)
  • vue-cli在webpack的配置文件探究
  • 离散点最小(凸)包围边界查找
  • 前端自动化解决方案
  • 使用Gradle第一次构建Java程序
  • 系统认识JavaScript正则表达式
  • 正则与JS中的正则
  • 组复制官方翻译九、Group Replication Technical Details
  • ​​​​​​​GitLab 之 GitLab-Runner 安装,配置与问题汇总
  • #LLM入门|Prompt#1.7_文本拓展_Expanding
  • #微信小程序:微信小程序常见的配置传值
  • ( 用例图)定义了系统的功能需求,它是从系统的外部看系统功能,并不描述系统内部对功能的具体实现
  • (4)logging(日志模块)
  • (Matalb分类预测)GA-BP遗传算法优化BP神经网络的多维分类预测
  • (Matalb时序预测)WOA-BP鲸鱼算法优化BP神经网络的多维时序回归预测
  • (二) Windows 下 Sublime Text 3 安装离线插件 Anaconda
  • (二)fiber的基本认识
  • (二)什么是Vite——Vite 和 Webpack 区别(冷启动)
  • (二)正点原子I.MX6ULL u-boot移植
  • (附源码)计算机毕业设计高校学生选课系统
  • (个人笔记质量不佳)SQL 左连接、右连接、内连接的区别
  • (三)elasticsearch 源码之启动流程分析
  • (转)Mysql的优化设置
  • (转)创业家杂志:UCWEB天使第一步
  • .net 4.0 A potentially dangerous Request.Form value was detected from the client 的解决方案