| author | Christian Urban <christian dot urban at kcl dot ac dot uk> | 
| Tue, 26 Sep 2017 14:07:29 +0100 | |
| changeset 503 | 3b9496db3fb9 | 
| parent 93 | 4794759139ea | 
| permissions | -rw-r--r-- | 
| 92 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 1 | :load matcher.scala | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 2 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 3 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 4 | // some regular expressions | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 5 | val KEYWORDS =  ALTS(List("#", "(", ")", ",", "->", "...", ":", ":>", ";", "=",
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 6 |   "=>", "[", "]", "_", "{", "|", "}", "abstype", "and", "andalso", "as",
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 7 | "case", "datatype", "do", "else", "end", "eqtype", "exception", "fn", | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 8 | "fun", "functor", "handle", "if", "in", "include", "infix", "infixr", | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 9 | "let", "local", "nonfix", "of", "op", "open", "orelse", "raise", "rec", | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 10 | "sharing", "sig", "signature", "struct", "structure", "then", "type", | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 11 | "val", "where", "while", "with", "withtype")) | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 12 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 13 | val DIGITS = RANGE("0123456789")
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 14 | val NONZERODIGITS = RANGE("123456789")
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 15 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 16 | val POSITIVES = ALT(SEQ(NONZERODIGITS, STAR(DIGITS)), "0") | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 17 | val INTEGERS = ALT(SEQ("~", POSITIVES), POSITIVES)
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 18 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 19 | val ALL = ALTS(KEYWORDS, INTEGERS) | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 20 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 21 | val COMMENT = SEQS("/*", NOT(SEGS(STAR(ALL), "*/", STAR(ALL))), "*/")
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 22 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 23 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 24 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 25 | val LPAREN = CHAR('(')
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 26 | val RPAREN = CHAR(')')
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 27 | val WHITESPACE = PLUS(RANGE(" \n".toList))
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 28 | val OPS = RANGE("+-*".toList)
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 29 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 30 | // for classifying the strings that have been recognised | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 31 | abstract class Token | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 32 | case object T_WHITESPACE extends Token | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 33 | case object T_NUM extends Token | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 34 | case class T_OP(s: String) extends Token | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 35 | case object T_LPAREN extends Token | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 36 | case object T_RPAREN extends Token | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 37 | case class T_NT(s: String, rhs: List[Token]) extends Token | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 38 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 39 | def tokenizer(rs: List[Rule[Token]], s: String) : List[Token] = | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 40 |   tokenize(rs, s.toList).filterNot(_ match {
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 41 | case T_WHITESPACE => true | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 42 | case _ => false | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 43 | }) | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 44 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 45 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 46 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 47 | // lexing rules for arithmetic expressions | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 48 | val lexing_rules: List[Rule[Token]]= | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 49 | List((NUMBER, (s) => T_NUM), | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 50 | (WHITESPACE, (s) => T_WHITESPACE), | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 51 | (LPAREN, (s) => T_LPAREN), | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 52 | (RPAREN, (s) => T_RPAREN), | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 53 | (OPS, (s) => T_OP(s.mkString))) | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 54 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 55 | tokenize_file(Nil, "nominal_library.ML") | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 56 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 57 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 58 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 59 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 60 | type Grammar = List[(String, List[Token])] | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 61 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 62 | // grammar for arithmetic expressions | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 63 | val grammar = | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 64 |   List ("E" -> List(T_NUM),
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 65 |         "E" -> List(T_NT("E", Nil), T_OP("+"), T_NT("E", Nil)),
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 66 |         "E" -> List(T_NT("E", Nil), T_OP("-"), T_NT("E", Nil)),
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 67 |         "E" -> List(T_NT("E", Nil), T_OP("*"), T_NT("E", Nil)),    
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 68 |         "E" -> List(T_LPAREN, T_NT("E", Nil), T_RPAREN))
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 69 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 70 | def startsWith[A](ts1: List[A], ts2: List[A]) : Boolean = (ts1, ts2) match {
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 71 | case (_, Nil) => true | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 72 | case (T_NT(e, _)::ts1,T_NT(f, _)::ts2) => (e == f) && startsWith(ts1, ts2) | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 73 | case (t1::ts1, t2::ts2) => (t1 == t2) && startsWith(ts1, ts2) | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 74 | case _ => false | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 75 | } | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 76 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 77 | def chop[A](ts1: List[A], prefix: List[A], ts2: List[A]) : Option[(List[A], List[A])] = | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 78 |   ts1 match {
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 79 | case Nil => None | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 80 | case t::ts => | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 81 | if (startsWith(ts1, prefix)) Some(ts2.reverse, ts1.drop(prefix.length)) | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 82 | else chop(ts, prefix, t::ts2) | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 83 | } | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 84 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 85 | // examples | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 86 | chop(List(1,2,3,4,5,6,7,8,9), List(4,5), Nil) | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 87 | chop(List(1,2,3,4,5,6,7,8,9), List(3,5), Nil) | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 88 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 89 | def replace[A](ts: List[A], out: List[A], in: List [A]) = | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 90 |   chop(ts, out, Nil) match {
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 91 | case None => None | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 92 | case Some((before, after)) => Some(before ::: in ::: after) | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 93 | } | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 94 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 95 | def parse1(g: Grammar, ts: List[Token]) : Boolean = ts match {
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 96 |   case List(T_NT("E", tree)) => { println(tree); true }
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 97 |   case _ => {
 | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 98 | val tss = for ((lhs, rhs) <- g) yield replace(ts, rhs, List(T_NT(lhs, rhs))) | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 99 | tss.flatten.exists(parse1(g, _)) | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 100 | } | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 101 | } | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 102 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 103 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 104 | println() ; parse1(grammar, tokenizer(lexing_rules, "2 + 3 * 4 + 1")) | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 105 | println() ; parse1(grammar, tokenizer(lexing_rules, "(2 + 3) * (4 + 1)")) | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 106 | println() ; parse1(grammar, tokenizer(lexing_rules, "(2 + 3) * 4 (4 + 1)")) | 
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 107 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 108 | |
| 
e85600529ca5
moved scala files
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 109 |