regexp3.scala
changeset 28 f63ba92a7d78
parent 26 06be91bbb1cd
child 29 774007c4b1b3
equal deleted inserted replaced
27:0f05e90b960d 28:f63ba92a7d78
    24 def nullable (r: Rexp) : Boolean = r match {
    24 def nullable (r: Rexp) : Boolean = r match {
    25   case NULL => false
    25   case NULL => false
    26   case EMPTY => true
    26   case EMPTY => true
    27   case CHAR(_) => false
    27   case CHAR(_) => false
    28   case ALT(r1, r2) => nullable(r1) || nullable(r2)
    28   case ALT(r1, r2) => nullable(r1) || nullable(r2)
    29   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
    29   case SEQ(r1, r2) => if (nullable(r1)) (zeroable(r1) && zeroable(r2)) else zeroable(r1)
    30   case STAR(_) => true
    30   case STAR(_) => true
    31   case NOT(r) => !(nullable(r))
    31   case NOT(r) => !(nullable(r))
    32 }
    32 }
    33 
    33 
    34 // tests whether a regular expression 
    34 // tests whether a regular expression 
    37   case NULL => true
    37   case NULL => true
    38   case EMPTY => false
    38   case EMPTY => false
    39   case CHAR(_) => false
    39   case CHAR(_) => false
    40   case ALT(r1, r2) => zeroable(r1) && zeroable(r2)
    40   case ALT(r1, r2) => zeroable(r1) && zeroable(r2)
    41   case SEQ(r1, r2) => if (nullable(r1)) (zeroable(r1) && zeroable(r2)) else zeroable(r1)
    41   case SEQ(r1, r2) => if (nullable(r1)) (zeroable(r1) && zeroable(r2)) else zeroable(r1)
    42       //zeroable(r1) || zeroable(r2)
    42   //case SEQ(r1, r2) => zeroable(r1) || zeroable(r2)
    43   case STAR(_) => false
    43   case STAR(_) => false
    44   case NOT(r) => !(zeroable(r))
    44   case NOT(r) => !(zeroable(r))
    45 }
    45 }
    46 
    46 
    47 
    47 
    88 val NONZERODIGITS = RANGE("123456789".toList)
    88 val NONZERODIGITS = RANGE("123456789".toList)
    89 
    89 
    90 val IDENT = SEQ(LETTER, STAR(ALT(LETTER,DIGITS)))
    90 val IDENT = SEQ(LETTER, STAR(ALT(LETTER,DIGITS)))
    91 val NUMBER = ALT(SEQ(NONZERODIGITS, STAR(DIGITS)), "0")
    91 val NUMBER = ALT(SEQ(NONZERODIGITS, STAR(DIGITS)), "0")
    92 val WHITESPACE = RANGE(" \n".toList)
    92 val WHITESPACE = RANGE(" \n".toList)
       
    93 val WHITESPACES = PLUS(WHITESPACE)
    93 
    94 
    94 val ALL = ALT(ALT(LETTER, DIGITS), WHITESPACE)
    95 val ALL = ALT(ALT(LETTER, DIGITS), WHITESPACE)
    95 
    96 
    96 val COMMENT = SEQ(SEQ("/*", NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL)))), "*/")
    97 val COMMENT = SEQ(SEQ("/*", NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL)))), "*/")
    97 
    98 
   100 case object T_WHITESPACE extends Token
   101 case object T_WHITESPACE extends Token
   101 case class T_IDENT(s: String) extends Token
   102 case class T_IDENT(s: String) extends Token
   102 case class T_OP(s: String) extends Token
   103 case class T_OP(s: String) extends Token
   103 case class T_NUM(n: Int) extends Token
   104 case class T_NUM(n: Int) extends Token
   104 case class T_KEYWORD(s: String) extends Token
   105 case class T_KEYWORD(s: String) extends Token
       
   106 case object T_COMMENT extends Token
   105 
   107 
   106 
   108 
   107 // an example list of rules
   109 // an example list of rules
   108 type Rule = (Rexp, List[Char] => Token)
   110 type Rule = (Rexp, List[Char] => Token)
   109 
   111 
   112        ("then", (s) => T_KEYWORD(s.mkString)),
   114        ("then", (s) => T_KEYWORD(s.mkString)),
   113        ("else", (s) => T_KEYWORD(s.mkString)),
   115        ("else", (s) => T_KEYWORD(s.mkString)),
   114        ("+", (s) => T_OP(s.mkString)),
   116        ("+", (s) => T_OP(s.mkString)),
   115        (IDENT, (s) => T_IDENT(s.mkString)),
   117        (IDENT, (s) => T_IDENT(s.mkString)),
   116        (NUMBER, (s) => T_NUM(s.mkString.toInt)),
   118        (NUMBER, (s) => T_NUM(s.mkString.toInt)),
   117        (WHITESPACE, (s) => T_WHITESPACE))
   119        (WHITESPACES, (s) => T_WHITESPACE))
   118 
   120 
   119 
   121 
   120 def error (s: String) = throw new IllegalArgumentException ("Could not lex " + s)
   122 def error (s: String) = throw new IllegalArgumentException ("Could not lex " + s)
   121 
   123 
   122 def munch(r: Rexp, action: List[Char] => Token, s: List[Char], t: List[Char]) : Option[(List[Char], Token)] = 
   124 def munch(r: Rexp, action: List[Char] => Token, s: List[Char], t: List[Char]) : Option[(List[Char], Token)] = 
   138   case _ => lex_one(rs, s) match {
   140   case _ => lex_one(rs, s) match {
   139     case (rest, t) => t :: lex_all(rs, rest) 
   141     case (rest, t) => t :: lex_all(rs, rest) 
   140   }
   142   }
   141 }
   143 }
   142 
   144 
       
   145 val rules: List[Rule]= 
       
   146   List(("if", (s) => T_KEYWORD(s.mkString)),
       
   147        ("then", (s) => T_KEYWORD(s.mkString)),
       
   148        ("else", (s) => T_KEYWORD(s.mkString)),
       
   149        ("+", (s) => T_OP(s.mkString)),
       
   150        (IDENT, (s) => T_IDENT(s.mkString)),
       
   151        (NUMBER, (s) => T_NUM(s.mkString.toInt)),
       
   152        (WHITESPACES, (s) => T_WHITESPACE),
       
   153        (COMMENT, (s) => T_COMMENT))
       
   154 
       
   155 println(lex_all(rules, "/*ifff if */ hhjj /*34 */".toList))
   143 
   156 
   144 
   157 
   145 println(lex_all(rules, "if true then 42 else +".toList))
   158 munch(COMMENT, (s) => T_COMMENT , "/*ifff if */ hhjj /*34 */".toList, Nil)
       
   159 val COMMENT2 = NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL)))
       
   160 
       
   161 der('/', COMMENT)
       
   162 zeroable(der('/', COMMENT))
       
   163 zeroable(der('a', COMMENT2))
       
   164 
       
   165 matcher(COMMENT2, "ifff if 34")
       
   166 munch(COMMENT2, "ifff if 34".toList, Nil)
       
   167 starts_with(COMMENT2, 'i')
       
   168 lex_all(regs, "ifff if 34".toList)
       
   169 lex_all(regs, "ifff $ if 34".toList)
       
   170 
       
   171 println(lex_all(rules, "/* if true then */ then 42 else +".toList))
       
   172 println(lex_all(rules, "if true then then 42 else +".toList))
   146 println(lex_all(rules, "ifff if     34 34".toList))
   173 println(lex_all(rules, "ifff if     34 34".toList))
   147 println(lex_all(rules, "ifff $ if 34".toList))
   174 println(lex_all(rules, "ifff $ if 34".toList))
   148 
   175 
   149 
   176