progs/parser1.scala
author Christian Urban <christian dot urban at kcl dot ac dot uk>
Mon, 20 Jul 2020 16:46:30 +0100
changeset 736 d3e477fe6c66
parent 93 4794759139ea
permissions -rw-r--r--
updated
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
71
7717f20f0504 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 61
diff changeset
     1
// A naive bottom-up parser with backtracking
7717f20f0504 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 61
diff changeset
     2
//
7717f20f0504 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 61
diff changeset
     3
// Needs:
7717f20f0504 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 61
diff changeset
     4
//   :load matcher.scala
50
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     5
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     6
// some regular expressions
71
7717f20f0504 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 61
diff changeset
     7
val DIGIT = RANGE("0123456789")
7717f20f0504 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 61
diff changeset
     8
val NONZERODIGIT = RANGE("123456789")
50
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     9
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    10
val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0")
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    11
val LPAREN = CHAR('(')
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    12
val RPAREN = CHAR(')')
71
7717f20f0504 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 61
diff changeset
    13
val WHITESPACE = PLUS(RANGE(" \n"))
7717f20f0504 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 61
diff changeset
    14
val OPS = RANGE("+-*")
50
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    15
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    16
// for classifying the strings that have been recognised
61
a80f0cf17f91 updated
Christian Urban <urbanc@in.tum.de>
parents: 54
diff changeset
    17
50
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    18
abstract class Token
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    19
case object T_WHITESPACE extends Token
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    20
case object T_NUM extends Token
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    21
case class T_OP(s: String) extends Token
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    22
case object T_LPAREN extends Token
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    23
case object T_RPAREN extends Token
51
6fe4facb56a6 updated
Christian Urban <urbanc@in.tum.de>
parents: 50
diff changeset
    24
case class NT(s: String) extends Token
50
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    25
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    26
// lexing rules for arithmetic expressions
61
a80f0cf17f91 updated
Christian Urban <urbanc@in.tum.de>
parents: 54
diff changeset
    27
val lexing_rules: List[Rule[Token]]= 
50
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    28
  List((NUMBER, (s) => T_NUM),
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    29
       (WHITESPACE, (s) => T_WHITESPACE),
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    30
       (LPAREN, (s) => T_LPAREN),
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    31
       (RPAREN, (s) => T_RPAREN),
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    32
       (OPS, (s) => T_OP(s.mkString)))
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    33
71
7717f20f0504 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 61
diff changeset
    34
// the tokenizer
7717f20f0504 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 61
diff changeset
    35
val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE))
50
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    36
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    37
type Grammar = List[(String, List[Token])]
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    38
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    39
// grammar for arithmetic expressions
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    40
val grammar = 
54
485f38b530ab updated
Christian Urban <urbanc@in.tum.de>
parents: 51
diff changeset
    41
  List ("F" -> List(T_NUM),
485f38b530ab updated
Christian Urban <urbanc@in.tum.de>
parents: 51
diff changeset
    42
        "E" -> List(T_NUM),
51
6fe4facb56a6 updated
Christian Urban <urbanc@in.tum.de>
parents: 50
diff changeset
    43
        "E" -> List(NT("E"), T_OP("+"), NT("E")),
6fe4facb56a6 updated
Christian Urban <urbanc@in.tum.de>
parents: 50
diff changeset
    44
        "E" -> List(NT("E"), T_OP("-"), NT("E")),
6fe4facb56a6 updated
Christian Urban <urbanc@in.tum.de>
parents: 50
diff changeset
    45
        "E" -> List(NT("E"), T_OP("*"), NT("E")),    
6fe4facb56a6 updated
Christian Urban <urbanc@in.tum.de>
parents: 50
diff changeset
    46
        "E" -> List(T_LPAREN, NT("E"), T_RPAREN))
50
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    47
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    48
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    49
def chop[A](ts1: List[A], prefix: List[A], ts2: List[A]) : Option[(List[A], List[A])] = 
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    50
  ts1 match {
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    51
    case Nil => None
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    52
    case t::ts => 
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    53
      if (ts1.startsWith(prefix)) Some(ts2.reverse, ts1.drop(prefix.length))
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    54
      else chop(ts, prefix, t::ts2)
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    55
  }
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    56
71
7717f20f0504 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 61
diff changeset
    57
// examples for chop 
50
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    58
chop(List(1,2,3,4,5,6,7,8,9), List(4,5), Nil)  
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    59
chop(List(1,2,3,4,5,6,7,8,9), List(3,5), Nil)  
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    60
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    61
def replace[A](ts: List[A], out: List[A], in: List [A]) = 
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    62
  chop(ts, out, Nil) match {
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    63
    case None => None
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    64
    case Some((before, after)) => Some(before ::: in ::: after)
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    65
  }  
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    66
51
6fe4facb56a6 updated
Christian Urban <urbanc@in.tum.de>
parents: 50
diff changeset
    67
def parse(g: Grammar, ts: List[Token]) : Boolean = {
54
485f38b530ab updated
Christian Urban <urbanc@in.tum.de>
parents: 51
diff changeset
    68
  println(ts)
51
6fe4facb56a6 updated
Christian Urban <urbanc@in.tum.de>
parents: 50
diff changeset
    69
  if (ts == List(NT("E"))) true
50
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    70
  else {
51
6fe4facb56a6 updated
Christian Urban <urbanc@in.tum.de>
parents: 50
diff changeset
    71
    val tss = for ((lhs, rhs) <- g) yield replace(ts, rhs, List(NT(lhs)))
6fe4facb56a6 updated
Christian Urban <urbanc@in.tum.de>
parents: 50
diff changeset
    72
    tss.flatten.exists(parse(g, _))
50
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    73
  }
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    74
}
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    75
 
71
7717f20f0504 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 61
diff changeset
    76
def parser(g: Grammar, s: String) = {
51
6fe4facb56a6 updated
Christian Urban <urbanc@in.tum.de>
parents: 50
diff changeset
    77
  println("\n")
71
7717f20f0504 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 61
diff changeset
    78
  parse(g, Tok.fromString(s))
51
6fe4facb56a6 updated
Christian Urban <urbanc@in.tum.de>
parents: 50
diff changeset
    79
}
6fe4facb56a6 updated
Christian Urban <urbanc@in.tum.de>
parents: 50
diff changeset
    80
  
50
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    81
51
6fe4facb56a6 updated
Christian Urban <urbanc@in.tum.de>
parents: 50
diff changeset
    82
71
7717f20f0504 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 61
diff changeset
    83
parser(grammar, "2 + 3 *    4 +       1")
7717f20f0504 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 61
diff changeset
    84
parser(grammar, "(2 + 3) * (4 + 1)")
7717f20f0504 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 61
diff changeset
    85
parser(grammar, "(2 + 3) * 4 (4 + 1)")
50
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    86
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    87
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    88