parser.scala
author Christian Urban <christian dot urban at kcl dot ac dot uk>
Tue, 20 Nov 2012 22:06:05 +0000
changeset 65 ade6af51402c
parent 61 a80f0cf17f91
permissions -rw-r--r--
tuned
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
61
a80f0cf17f91 updated
Christian Urban <urbanc@in.tum.de>
parents: 54
diff changeset
     1
:load matcher.scala
50
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     2
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     3
// some regular expressions
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     4
val DIGIT = RANGE("0123456789".toList)
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     5
val NONZERODIGIT = RANGE("123456789".toList)
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     6
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     7
val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0")
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     8
val LPAREN = CHAR('(')
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     9
val RPAREN = CHAR(')')
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    10
val WHITESPACE = PLUS(RANGE(" \n".toList))
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    11
val OPS = RANGE("+-*".toList)
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    12
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    13
// for classifying the strings that have been recognised
61
a80f0cf17f91 updated
Christian Urban <urbanc@in.tum.de>
parents: 54
diff changeset
    14
50
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    15
abstract class Token
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    16
case object T_WHITESPACE extends Token
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    17
case object T_NUM extends Token
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    18
case class T_OP(s: String) extends Token
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    19
case object T_LPAREN extends Token
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    20
case object T_RPAREN extends Token
51
6fe4facb56a6 updated
Christian Urban <urbanc@in.tum.de>
parents: 50
diff changeset
    21
case class NT(s: String) extends Token
50
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    22
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    23
61
a80f0cf17f91 updated
Christian Urban <urbanc@in.tum.de>
parents: 54
diff changeset
    24
def tokenizer(rs: List[Rule[Token]], s: String) : List[Token] = 
50
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    25
  tokenize(rs, s.toList).filterNot(_ match {
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    26
    case T_WHITESPACE => true
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    27
    case _ => false
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    28
  })
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    29
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    30
// lexing rules for arithmetic expressions
61
a80f0cf17f91 updated
Christian Urban <urbanc@in.tum.de>
parents: 54
diff changeset
    31
val lexing_rules: List[Rule[Token]]= 
50
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    32
  List((NUMBER, (s) => T_NUM),
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    33
       (WHITESPACE, (s) => T_WHITESPACE),
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    34
       (LPAREN, (s) => T_LPAREN),
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    35
       (RPAREN, (s) => T_RPAREN),
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    36
       (OPS, (s) => T_OP(s.mkString)))
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    37
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    38
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    39
type Grammar = List[(String, List[Token])]
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    40
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    41
// grammar for arithmetic expressions
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    42
val grammar = 
54
485f38b530ab updated
Christian Urban <urbanc@in.tum.de>
parents: 51
diff changeset
    43
  List ("F" -> List(T_NUM),
485f38b530ab updated
Christian Urban <urbanc@in.tum.de>
parents: 51
diff changeset
    44
        "E" -> List(T_NUM),
51
6fe4facb56a6 updated
Christian Urban <urbanc@in.tum.de>
parents: 50
diff changeset
    45
        "E" -> List(NT("E"), T_OP("+"), NT("E")),
6fe4facb56a6 updated
Christian Urban <urbanc@in.tum.de>
parents: 50
diff changeset
    46
        "E" -> List(NT("E"), T_OP("-"), NT("E")),
6fe4facb56a6 updated
Christian Urban <urbanc@in.tum.de>
parents: 50
diff changeset
    47
        "E" -> List(NT("E"), T_OP("*"), NT("E")),    
6fe4facb56a6 updated
Christian Urban <urbanc@in.tum.de>
parents: 50
diff changeset
    48
        "E" -> List(T_LPAREN, NT("E"), T_RPAREN))
50
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    49
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    50
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    51
def chop[A](ts1: List[A], prefix: List[A], ts2: List[A]) : Option[(List[A], List[A])] = 
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    52
  ts1 match {
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    53
    case Nil => None
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    54
    case t::ts => 
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    55
      if (ts1.startsWith(prefix)) Some(ts2.reverse, ts1.drop(prefix.length))
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    56
      else chop(ts, prefix, t::ts2)
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    57
  }
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    58
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    59
// examples
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    60
chop(List(1,2,3,4,5,6,7,8,9), List(4,5), Nil)  
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    61
chop(List(1,2,3,4,5,6,7,8,9), List(3,5), Nil)  
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    62
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    63
def replace[A](ts: List[A], out: List[A], in: List [A]) = 
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    64
  chop(ts, out, Nil) match {
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    65
    case None => None
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    66
    case Some((before, after)) => Some(before ::: in ::: after)
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    67
  }  
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    68
51
6fe4facb56a6 updated
Christian Urban <urbanc@in.tum.de>
parents: 50
diff changeset
    69
def parse(g: Grammar, ts: List[Token]) : Boolean = {
54
485f38b530ab updated
Christian Urban <urbanc@in.tum.de>
parents: 51
diff changeset
    70
  println(ts)
51
6fe4facb56a6 updated
Christian Urban <urbanc@in.tum.de>
parents: 50
diff changeset
    71
  if (ts == List(NT("E"))) true
50
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    72
  else {
51
6fe4facb56a6 updated
Christian Urban <urbanc@in.tum.de>
parents: 50
diff changeset
    73
    val tss = for ((lhs, rhs) <- g) yield replace(ts, rhs, List(NT(lhs)))
6fe4facb56a6 updated
Christian Urban <urbanc@in.tum.de>
parents: 50
diff changeset
    74
    tss.flatten.exists(parse(g, _))
50
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    75
  }
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    76
}
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    77
 
61
a80f0cf17f91 updated
Christian Urban <urbanc@in.tum.de>
parents: 54
diff changeset
    78
def parser(g: Grammar, rs: List[Rule[Token]], s: String) = {
51
6fe4facb56a6 updated
Christian Urban <urbanc@in.tum.de>
parents: 50
diff changeset
    79
  println("\n")
6fe4facb56a6 updated
Christian Urban <urbanc@in.tum.de>
parents: 50
diff changeset
    80
  parse(g, tokenizer(rs, s))
6fe4facb56a6 updated
Christian Urban <urbanc@in.tum.de>
parents: 50
diff changeset
    81
}
6fe4facb56a6 updated
Christian Urban <urbanc@in.tum.de>
parents: 50
diff changeset
    82
  
50
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    83
51
6fe4facb56a6 updated
Christian Urban <urbanc@in.tum.de>
parents: 50
diff changeset
    84
54
485f38b530ab updated
Christian Urban <urbanc@in.tum.de>
parents: 51
diff changeset
    85
parser(grammar, lexing_rules, "2 + 3 *    4 +       1")
51
6fe4facb56a6 updated
Christian Urban <urbanc@in.tum.de>
parents: 50
diff changeset
    86
parser(grammar, lexing_rules, "(2 + 3) * (4 + 1)")
6fe4facb56a6 updated
Christian Urban <urbanc@in.tum.de>
parents: 50
diff changeset
    87
parser(grammar, lexing_rules, "(2 + 3) * 4 (4 + 1)")
50
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    88
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    89
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    90