regexp2.scala
changeset 26 06be91bbb1cd
child 28 f63ba92a7d78
equal deleted inserted replaced
25:94133a0e62ed 26:06be91bbb1cd
       
     1 // regular expressions
       
     2 abstract class Rexp
       
     3 
       
     4 case object NULL extends Rexp
       
     5 case object EMPTY extends Rexp
       
     6 case class CHAR(c: Char) extends Rexp
       
     7 case class ALT(r1: Rexp, r2: Rexp) extends Rexp
       
     8 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
       
     9 case class STAR(r: Rexp) extends Rexp
       
    10 case class NOT(r: Rexp) extends Rexp
       
    11 
       
    12 
       
    13 // some convenience for typing in regular expressions
       
    14 def charlist2rexp(s : List[Char]) : Rexp = s match {
       
    15   case Nil => EMPTY
       
    16   case c::Nil => CHAR(c)
       
    17   case c::s => SEQ(CHAR(c), charlist2rexp(s))
       
    18 }
       
    19 implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
       
    20 
       
    21 
       
    22 // nullable function: tests whether the regular 
       
    23 // expression can recognise the empty string
       
    24 def nullable (r: Rexp) : Boolean = r match {
       
    25   case NULL => false
       
    26   case EMPTY => true
       
    27   case CHAR(_) => false
       
    28   case ALT(r1, r2) => nullable(r1) || nullable(r2)
       
    29   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
       
    30   case STAR(_) => true
       
    31   case NOT(r) => !(nullable(r))
       
    32 }
       
    33 
       
    34 // tests whether a regular expression 
       
    35 // recognises nothing
       
    36 def zeroable (r: Rexp) : Boolean = r match {
       
    37   case NULL => true
       
    38   case EMPTY => false
       
    39   case CHAR(_) => false
       
    40   case ALT(r1, r2) => zeroable(r1) && zeroable(r2)
       
    41   case SEQ(r1, r2) => zeroable(r1) || zeroable(r2)
       
    42   case STAR(_) => false
       
    43   case NOT(r) => !(zeroable(r))
       
    44 }
       
    45 
       
    46 
       
    47 // derivative of a regular expression w.r.t. a character
       
    48 def der (c: Char, r: Rexp) : Rexp = r match {
       
    49   case NULL => NULL
       
    50   case EMPTY => NULL
       
    51   case CHAR(d) => if (c == d) EMPTY else NULL
       
    52   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
       
    53   case SEQ(r1, r2) => 
       
    54     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
       
    55     else SEQ(der(c, r1), r2)
       
    56   case STAR(r) => SEQ(der(c, r), STAR(r))
       
    57   case NOT(r) => NOT(der (c, r))
       
    58 }
       
    59 
       
    60 // derivative w.r.t. a string (iterates der)
       
    61 def ders (s: List[Char], r: Rexp) : Rexp = s match {
       
    62   case Nil => r
       
    63   case c::s => ders(s, der(c, r))
       
    64 }
       
    65 
       
    66 // main matcher function
       
    67 def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
       
    68 
       
    69 
       
    70 // regular expression for specifying 
       
    71 // ranges of characters
       
    72 def RANGE(s : List[Char]) : Rexp = s match {
       
    73   case Nil => NULL
       
    74   case c::Nil => CHAR(c)
       
    75   case c::s => ALT(CHAR(c), RANGE(s))
       
    76 }
       
    77 
       
    78 //one or more
       
    79 def PLUS(r: Rexp) = SEQ(r, STAR(r))
       
    80 
       
    81 
       
    82 //some regular expressions
       
    83 val LOWERCASE = RANGE("abcdefghijklmnopqrstuvwxyz".toList)
       
    84 val UPPERCASE = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZ".toList)
       
    85 val LETTER = ALT(LOWERCASE, UPPERCASE)
       
    86 val DIGITS = RANGE("0123456789".toList)
       
    87 val NONZERODIGITS = RANGE("123456789".toList)
       
    88 
       
    89 val IDENT = SEQ(LETTER, STAR(ALT(LETTER,DIGITS)))
       
    90 val NUMBER = ALT(SEQ(NONZERODIGITS, STAR(DIGITS)), "0")
       
    91 val WHITESPACE = RANGE(" \n".toList)
       
    92 
       
    93 val ALL = ALT(ALT(LETTER, DIGITS), WHITESPACE)
       
    94 
       
    95 val COMMENT = SEQ(SEQ("/*", NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL)))), "*/")
       
    96 
       
    97 println(matcher(NUMBER, "0"))
       
    98 println(matcher(NUMBER, "01"))
       
    99 println(matcher(NUMBER, "123450"))
       
   100 
       
   101 println(matcher(SEQ(STAR("a"), STAR("b")), "bbaaa"))
       
   102 println(matcher(ALT(STAR("a"), STAR("b")), ""))
       
   103 println(matcher("abc", ""))
       
   104 println(matcher(STAR(ALT(EMPTY, "a")), ""))
       
   105 println(matcher(STAR(EMPTY), "a"))
       
   106 println(matcher("cab","cab"))
       
   107 println(matcher(STAR("a"),"aaa"))
       
   108 println(matcher("cab" ,"cab"))
       
   109 println(matcher(STAR("a"),"aaa"))
       
   110 
       
   111 println(matcher(COMMENT, "/* */"))
       
   112 println(matcher(COMMENT, "/* foobar comment */"))
       
   113 println(matcher(COMMENT, "/* test */ test */"))
       
   114 
       
   115 // an example list of regular expressions
       
   116 val regs: List[Rexp]=  List("if", "then", "else", "+", IDENT, NUMBER, WHITESPACE) 
       
   117 
       
   118 def error (s: String) = throw new IllegalArgumentException ("Could not lex " + s)
       
   119 
       
   120 def munch(r: Rexp, s: List[Char], t: List[Char]) : Option[(List[Char], List[Char])] = s match {
       
   121   case Nil if (nullable(r)) => Some(Nil, t)
       
   122   case Nil => None
       
   123   case c::s if (zeroable(der (c, r)) && nullable(r)) => Some(c::s, t)
       
   124   case c::s if (zeroable(der (c, r))) => None
       
   125   case c::s => munch(der (c, r), s, t ::: List(c))
       
   126 }
       
   127 
       
   128 def lex_one (regs: List[Rexp], s: List[Char]) : (List[Char], List[Char]) = {
       
   129  val somes = regs.map { munch(_, s, Nil) } .flatten
       
   130  if (somes == Nil) error(s.mkString) else (somes sortBy (_._1.length) head)
       
   131 }
       
   132 
       
   133 def lex_all (regs: List[Rexp], s: List[Char]) : List[String] = s match {
       
   134   case Nil => Nil
       
   135   case _ => lex_one(regs, s) match {
       
   136     case (rest, s) => s.mkString :: lex_all(regs, rest) 
       
   137   }
       
   138 }
       
   139 
       
   140 println(lex_all(rules, "if true then 42 else +".toList))
       
   141 println(lex_all(rules, "ifff if     34 34".toList))
       
   142 println(lex_all(rules, "ifff $ if 34".toList))
       
   143