# HG changeset patch # User Christian Urban # Date 1371301871 14400 # Node ID e85600529ca504eb12b927121719a3f5543f91b4 # Parent 47f86885d48174820f4a6085ff0f954028d4839d moved scala files diff -r 47f86885d481 -r e85600529ca5 S_grammar-token.scala --- a/S_grammar-token.scala Sun Dec 23 00:38:56 2012 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,47 +0,0 @@ -//:load matcher.scala -//:load parser3.scala - -abstract class Token -case object T_ONE extends Token - -val lexing_rules : List[Rule[Token]] = - List(("1", (s: List[Char]) => T_ONE)) - -val T = Tokenizer(lexing_rules) - -case class TokParser(tok: Token) extends Parser[List[Token], Token] { - def parse(ts: List[Token]) = ts match { - case t::ts if (t == tok) => Set((t, ts)) - case _ => Set () - } -} -implicit def token2tokparser(t: Token) = TokParser(t) - -case object EmpParser extends Parser[List[Token], String] { - def parse(ts: List[Token]) = Set(("", ts)) -} - - -lazy val Su: Parser[List[Token], String] = - (T_ONE ~ Su) ==> { case (x, y) => "1" + y} || EmpParser - - -def time_needed[T](i: Int, code: => T) = { - val start = System.nanoTime() - for (j <- 1 to i) code - val end = System.nanoTime() - (end - start)/(i * 1.0e9) -} - -def test(i: Int) = { - val result = Su.parse_all(T.fromString("1" * i)) - //print(result.size + " ") -} - - -for (i <- 1 to 1000 by 50) { - print(i + " ") - print("%.5f".format(time_needed(1, test(i)))) - print("\n") -} - diff -r 47f86885d481 -r e85600529ca5 S_grammar.scala --- a/S_grammar.scala Sun Dec 23 00:38:56 2012 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,52 +0,0 @@ -//:load parser3.scala - -case class StringParser(s: String) extends Parser[String, String] { - def parse(ts: String) = { - if (s.length <= ts.length && ts.startsWith(s)) Set((s, ts.drop(s.length))) - else Set() - } -} - -implicit def string2parser(s: String) = StringParser(s) - -def time_needed[T](i: Int, code: => T) = { - val start = System.nanoTime() - for (j <- 1 to i) code - val end = System.nanoTime() - (end - start)/(i * 1.0e9) -} - -// unambiguous grammar - -lazy val U: Parser[String, String] = - ("1" ~ U) ==> { case (x, y) => "1" + y} || "" - -def test1(i: Int) = { - val result = U.parse_all("1" * i) - //print(result.size + " ") -} - -for (i <- 1 to 1000 by 50) { - print(i + " ") - print("%.5f".format(time_needed(1, test1(i)))) - print("\n") -} - - - -// ambiguous grammar -// n = 16 -> over 35 million parse trees - -lazy val S: Parser[String, String] = - ("1" ~ S ~ S) ==> { case ((x, y), z) => "1" + y + z} || "" - -def test2(i: Int) = { - val result = S.parse_all("1" * i) - print(result.size + " ") -} - -for (i <- 1 to 30) { - print(i + " ") - print("%.5f".format(time_needed(1, test2(i)))) - print("\n") -} diff -r 47f86885d481 -r e85600529ca5 Term_grammar.scala --- a/Term_grammar.scala Sun Dec 23 00:38:56 2012 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,62 +0,0 @@ -//:load matcher.scala -//:load parser3.scala - -// some regular expressions -val LETTER = RANGE("abcdefghijklmnopqrstuvwxyz") -val ID = PLUS(LETTER) - -val DIGIT = RANGE("0123456789") -val NONZERODIGIT = RANGE("123456789") -val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") - -val LPAREN = CHAR('(') -val RPAREN = CHAR(')') - -val WHITESPACE = PLUS(RANGE(" \n")) -val OPS = RANGE("+-*") - -// for classifying the strings that have been lexed -abstract class Token - -case object T_WHITESPACE extends Token -case class T_NUM(s: String) extends Token -case class T_OP(s: String) extends Token -case object T_LPAREN extends Token -case object T_RPAREN extends Token - - -// lexing rules for arithmetic expressions -val lexing_rules: List[Rule[Token]]= - List((NUMBER, (s) => T_NUM(s.mkString)), - (WHITESPACE, (s) => T_WHITESPACE), - (LPAREN, (s) => T_LPAREN), - (RPAREN, (s) => T_RPAREN), - (OPS, (s) => T_OP(s.mkString))) - -val Tk = Tokenizer(lexing_rules, List(T_WHITESPACE)) - - -case class TokParser(tok: Token) extends Parser[List[Token], Token] { - def parse(ts: List[Token]) = ts match { - case t::ts if (t == tok) => Set((t, ts)) - case _ => Set () - } -} -implicit def token2tparser(t: Token) = TokParser(t) - -case object NumParser extends Parser[List[Token], Int] { - def parse(ts: List[Token]) = ts match { - case T_NUM(s)::ts => Set((s.toInt, ts)) - case _ => Set () - } -} - -lazy val E: Parser[List[Token], Int] = (T ~ T_OP("+") ~ E) ==> { case ((x, y), z) => x + z } || T -lazy val T: Parser[List[Token], Int] = (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => x * z } || F -lazy val F: Parser[List[Token], Int] = (T_LPAREN ~> E <~ T_RPAREN) || NumParser - -println(E.parse_all(Tk.fromString("1 + 2 + 3"))) -println(E.parse_all(Tk.fromString("1 + 2 * 3"))) -println(E.parse_all(Tk.fromString("(1 + 2) * 3"))) -println(E.parse_all(Tk.fromString("(14 + 2) * (3 + 2)"))) - diff -r 47f86885d481 -r e85600529ca5 app0.scala --- a/app0.scala Sun Dec 23 00:38:56 2012 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -import io.Source - -def get_page(url: String) : String = { - Source.fromURL(url).take(10000).mkString - - diff -r 47f86885d481 -r e85600529ca5 app1.scala --- a/app1.scala Sun Dec 23 00:38:56 2012 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,12 +0,0 @@ -def get_page(url: String) : String = { - try { - Source.fromURL(url).take(10000).mkString - } - catch { - case e => { - println(" Problem with: " + url) - "" - } - } -} - diff -r 47f86885d481 -r e85600529ca5 app2.scala --- a/app2.scala Sun Dec 23 00:38:56 2012 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,16 +0,0 @@ -val http_pattern = """\"https?://[^\"]*\"""".r - -def unquote(s: String) = s.drop(1).dropRight(1) - -def get_all_URLs(page: String) : Set[String] = { - (http_pattern.findAllIn(page)).map { unquote(_) }.toSet -} - -def crawl(url: String, n: Int) : Unit = { - if (n == 0) () - else { - println("Visiting: " + n + " " + url) - for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1) - } -} - diff -r 47f86885d481 -r e85600529ca5 app3.scala --- a/app3.scala Sun Dec 23 00:38:56 2012 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ -val my_urls = """urbanc""".r - -def crawl(url: String, n: Int) : Unit = { - if (n == 0) () - else if (my_urls.findFirstIn(url) == None) () - else { - println("Visiting: " + n + " " + url) - for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1) - } -} diff -r 47f86885d481 -r e85600529ca5 app4.scala --- a/app4.scala Sun Dec 23 00:38:56 2012 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,14 +0,0 @@ -val http_pattern = """\"https?://[^\"]*\"""".r -val my_urls = """urbanc""".r -val email_pattern = - """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r - -def crawl(url: String, n: Int) : Unit = { - if (n == 0) () - else { - println("Visiting: " + n + " " + url) - val page = get_page(url) - println(email_pattern.findAllIn(page).mkString("\n")) - for (u <- get_all_URLs(page)) crawl(u, n - 1) - } -} diff -r 47f86885d481 -r e85600529ca5 app5.scala --- a/app5.scala Sun Dec 23 00:38:56 2012 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ -def nullable (r: Rexp) : Boolean = r match { - case NULL => false - case EMPTY => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true -} diff -r 47f86885d481 -r e85600529ca5 app51.scala --- a/app51.scala Sun Dec 23 00:38:56 2012 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ -abstract class Rexp - -case object NULL extends Rexp -case object EMPTY extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp diff -r 47f86885d481 -r e85600529ca5 app6.scala --- a/app6.scala Sun Dec 23 00:38:56 2012 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,11 +0,0 @@ -def deriv (r: Rexp, c: Char) : Rexp = r match { - case NULL => NULL - case EMPTY => NULL - case CHAR(d) => if (c == d) EMPTY else NULL - case ALT(r1, r2) => ALT(deriv(r1, c), deriv(r2, c)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(deriv(r1, c), r2), deriv(r2, c)) - else SEQ(deriv(r1, c), r2) - case STAR(r) => SEQ(deriv(r, c), STAR(r)) -} - diff -r 47f86885d481 -r e85600529ca5 app7.scala --- a/app7.scala Sun Dec 23 00:38:56 2012 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,16 +0,0 @@ -abstract class Parser[I, T] { - def parse(ts: I): Set[(T, I)] - - def parse_all(ts: I) : Set[T] = - for ((head, tail) <- parse(ts); if (tail.isEmpty)) - yield head - - def || (right : => Parser[I, T]) : Parser[I, T] = - new AltParser(this, right) - def ==>[S] (f: => T => S) : Parser [I, S] = - new FunParser(this, f) - def ~[S] (right : => Parser[I, S]) : Parser[I, (T, S)] = - new SeqParser(this, right) -} - - diff -r 47f86885d481 -r e85600529ca5 app8.scala --- a/app8.scala Sun Dec 23 00:38:56 2012 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,23 +0,0 @@ -class SeqParser[I, T, S](p: => Parser[I, T], - q: => Parser[I, S]) - extends Parser[I, (T, S)] { - def parse(sb: I) = - for ((head1, tail1) <- p.parse(sb); - (head2, tail2) <- q.parse(tail1)) - yield ((head1, head2), tail2) -} - -class AltParser[I, T](p: => Parser[I, T], - q: => Parser[I, T]) - extends Parser[I, T] { - def parse(sb: I) = p.parse(sb) ++ q.parse(sb) -} - -class FunParser[I, T, S](p: => Parser[I, T], f: T => S) - extends Parser[I, S] { - def parse(sb: I) = - for ((head, tail) <- p.parse(sb)) - yield (f(head), tail) -} - - diff -r 47f86885d481 -r e85600529ca5 automata.scala --- a/automata.scala Sun Dec 23 00:38:56 2012 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,106 +0,0 @@ - -// a class for deterministic finite automata, -// the type of states is kept polymorphic - -case class Automaton[A](start: A, states: Set[A], delta: Map[(A, Char), A], fins: Set[A]) { - - // the transition function lifted to list of characters - def deltas(q: A, cs: List[Char]) : Either[A, String] = - if (states.contains(q)) cs match { - case Nil => Left(q) - case c::cs => - if (delta.isDefinedAt(q, c)) deltas(delta(q, c), cs) - else Right(q + " does not have a transition for " + c) - } - else Right(q + " is not a state of the automaton") - - // wether a string is accepted by the automaton - def accepts(s: String) = deltas(start, s.toList) match { - case Left(q) => fins.contains(q) - case _ => false - } -} - - -// translating a regular expression into a finite -// automaton - -abstract class Rexp - -case object NULL extends Rexp -case object EMPTY extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp - -implicit def string2rexp(s : String) = { - def chars2rexp (cs: List[Char]) : Rexp = cs match { - case Nil => EMPTY - case c::Nil => CHAR(c) - case c::cs => SEQ(CHAR(c), chars2rexp(cs)) - } - chars2rexp(s.toList) -} - -def nullable (r: Rexp) : Boolean = r match { - case NULL => false - case EMPTY => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true -} - -def der (r: Rexp, c: Char) : Rexp = r match { - case NULL => NULL - case EMPTY => NULL - case CHAR(d) => if (c == d) EMPTY else NULL - case ALT(r1, r2) => ALT(der(r1, c), der(r2, c)) - case SEQ(r1, r2) => if (nullable(r1)) ALT(SEQ(der(r1, c), r2), der(r2, c)) - else SEQ(der(r1, c), r2) - case STAR(r) => SEQ(der(r, c), STAR(r)) -} - - -// Here we construct an automaton whose -// states are regular expressions -type State = Rexp -type States = Set[State] -type Transition = Map[(State, Char), State] - -// we use as an alphabet all lowercase letters -val alphabet = "abcdefghijklmnopqrstuvwxyz".toSet - -def goto(q: State, c: Char, qs: States, delta: Transition) : (States, Transition) = { - val q_der : State = der(q, c) - if (qs.contains(q_der)) (qs, delta + ((q, c) -> q)) - else explore(qs + q_der, delta + ((q, c) -> q_der), q_der) -} - -def explore (qs: States, delta: Transition, q: State) : (States, Transition) = - alphabet.foldRight[(States, Transition)] (qs, delta) ((c, qsd) => goto(q, c, qsd._1, qsd._2)) - - -def mk_automaton (r: Rexp) : Automaton[Rexp] = { - val (qs, delta) = explore(Set(r), Map(), r); - val fins = for (q <- qs if nullable(q)) yield q; - Automaton[Rexp](r, qs, delta, fins) -} - -val A = mk_automaton(ALT("ab","ac")) - -A.start -A.states.toList.length - -println(A.accepts("bd")) -println(A.accepts("ab")) -println(A.accepts("ac")) - -val r1 = STAR(ALT("a","b")) -val r2 = SEQ("b","b") -val r3 = SEQ(SEQ(SEQ(r1, r2), r1), "a") -val B = mk_automaton(r3) - -B.start -B.states.toList.length diff -r 47f86885d481 -r e85600529ca5 compile.scala --- a/compile.scala Sun Dec 23 00:38:56 2012 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,326 +0,0 @@ -// A parser and evaluator for teh while language -// -import matcher._ -import parser._ - -// some regular expressions -val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_") -val DIGIT = RANGE("0123456789") -val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT))) -val NUM = PLUS(DIGIT) -val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false", "write") -val SEMI: Rexp = ";" -val OP: Rexp = ALTS(":=", "=", "-", "+", "*", "!=", "<", ">") -val WHITESPACE = PLUS(RANGE(" \n")) -val RPAREN: Rexp = ")" -val LPAREN: Rexp = "(" -val BEGIN: Rexp = "{" -val END: Rexp = "}" -val COMMENT = SEQS("/*", NOT(SEQS(STAR(ALLC), "*/", STAR(ALLC))), "*/") - -// tokens for classifying the strings that have been recognised -abstract class Token -case object T_WHITESPACE extends Token -case object T_COMMENT extends Token -case object T_SEMI extends Token -case object T_LPAREN extends Token -case object T_RPAREN extends Token -case object T_BEGIN extends Token -case object T_END extends Token -case class T_ID(s: String) extends Token -case class T_OP(s: String) extends Token -case class T_NUM(s: String) extends Token -case class T_KWD(s: String) extends Token - -val lexing_rules: List[(Rexp, List[Char] => Token)] = - List((KEYWORD, (s) => T_KWD(s.mkString)), - (ID, (s) => T_ID(s.mkString)), - (OP, (s) => T_OP(s.mkString)), - (NUM, (s) => T_NUM(s.mkString)), - (SEMI, (s) => T_SEMI), - (LPAREN, (s) => T_LPAREN), - (RPAREN, (s) => T_RPAREN), - (BEGIN, (s) => T_BEGIN), - (END, (s) => T_END), - (WHITESPACE, (s) => T_WHITESPACE), - (COMMENT, (s) => T_COMMENT)) - -// the tokenizer -val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE, T_COMMENT)) - -// the abstract syntax trees -abstract class Stmt -abstract class AExp -abstract class BExp -type Block = List[Stmt] -case object Skip extends Stmt -case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt -case class While(b: BExp, bl: Block) extends Stmt -case class Assign(s: String, a: AExp) extends Stmt -case class Write(s: String) extends Stmt - -case class Var(s: String) extends AExp -case class Num(i: Int) extends AExp -case class Aop(o: String, a1: AExp, a2: AExp) extends AExp - -case object True extends BExp -case object False extends BExp -case class Relop(o: String, a1: AExp, a2: AExp) extends BExp - -// atomic parsers -case class TokParser(tok: Token) extends Parser[List[Token], Token] { - def parse(ts: List[Token]) = ts match { - case t::ts if (t == tok) => Set((t, ts)) - case _ => Set () - } -} -implicit def token2tparser(t: Token) = TokParser(t) - -case object NumParser extends Parser[List[Token], Int] { - def parse(ts: List[Token]) = ts match { - case T_NUM(s)::ts => Set((s.toInt, ts)) - case _ => Set () - } -} - -case object IdParser extends Parser[List[Token], String] { - def parse(ts: List[Token]) = ts match { - case T_ID(s)::ts => Set((s, ts)) - case _ => Set () - } -} - - -// arithmetic expressions -lazy val AExp: Parser[List[Token], AExp] = - (T ~ T_OP("+") ~ AExp) ==> { case ((x, y), z) => Aop("+", x, z): AExp } || - (T ~ T_OP("-") ~ AExp) ==> { case ((x, y), z) => Aop("-", x, z): AExp } || T -lazy val T: Parser[List[Token], AExp] = - (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F -lazy val F: Parser[List[Token], AExp] = - (T_LPAREN ~> AExp <~ T_RPAREN) || - IdParser ==> Var || - NumParser ==> Num - -// boolean expressions -lazy val BExp: Parser[List[Token], BExp] = - (T_KWD("true") ==> ((_) => True: BExp)) || - (T_KWD("false") ==> ((_) => False: BExp)) || - (T_LPAREN ~> BExp <~ T_RPAREN) || - (AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Relop("=", x, z): BExp } || - (AExp ~ T_OP("!=") ~ AExp) ==> { case ((x, y), z) => Relop("!=", x, z): BExp } || - (AExp ~ T_OP("<") ~ AExp) ==> { case ((x, y), z) => Relop("<", x, z): BExp } || - (AExp ~ T_OP(">") ~ AExp) ==> { case ((x, y), z) => Relop("<", z, x): BExp } - -lazy val Stmt: Parser[List[Token], Stmt] = - (T_KWD("skip") ==> ((_) => Skip: Stmt)) || - (IdParser ~ T_OP(":=") ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } || - (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Block ~ T_KWD("else") ~ Block) ==> - { case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } || - (T_KWD("while") ~ BExp ~ T_KWD("do") ~ Block) ==> { case (((x, y), z), w) => While(y, w) } || - (T_KWD("write") ~ IdParser) ==> { case (x, y) => Write(y) } - -lazy val Stmts: Parser[List[Token], Block] = - (Stmt ~ T_SEMI ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } || - (Stmt ==> ((s) => List(s) : Block)) - -lazy val Block: Parser[List[Token], Block] = - (T_BEGIN ~> Stmts <~ T_END) || - (Stmt ==> ((s) => List(s))) - -// compiler -val beginning = """ -.class public XXX.XXX -.super java/lang/Object - -.method public ()V - aload_0 - invokenonvirtual java/lang/Object/()V - return -.end method - -.method public static write(I)V - .limit locals 5 - .limit stack 5 - iload 0 - getstatic java/lang/System/out Ljava/io/PrintStream; - swap - invokevirtual java/io/PrintStream/println(I)V - return -.end method - - -.method public static main([Ljava/lang/String;)V - .limit locals 200 - .limit stack 200 - -""" - -val ending = """ - - return - -.end method -""" - -// for generating new labels -var counter = -1 - -def Fresh(x: String) = { - counter += 1 - x ++ "_" ++ counter.toString() -} - -type Env = Map[String, String] -type Instrs = List[String] - -def compile_aexp(a: AExp, env : Env) : Instrs = a match { - case Num(i) => List("ldc " + i.toString + "\n") - case Var(s) => List("iload " + env(s) + "\n") - case Aop("+", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("iadd\n") - case Aop("-", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("isub\n") - case Aop("*", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("imul\n") -} - -def compile_bexp(b: BExp, env : Env, jmp: String) : Instrs = b match { - case True => Nil - case False => List("goto " + jmp + "\n") - case Relop("=", a1, a2) => - compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpne " + jmp + "\n") - case Relop("!=", a1, a2) => - compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpeq " + jmp + "\n") - case Relop("<", a1, a2) => - compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpge " + jmp + "\n") -} - - -def compile_stmt(s: Stmt, env: Env) : (Instrs, Env) = s match { - case Skip => (Nil, env) - case Assign(x, a) => { - val index = if (env.isDefinedAt(x)) env(x) else env.keys.size.toString - (compile_aexp(a, env) ++ - List("istore " + index + "\n"), env + (x -> index)) - } - case If(b, bl1, bl2) => { - val if_else = Fresh("If_else") - val if_end = Fresh("If_end") - val (instrs1, env1) = compile_bl(bl1, env) - val (instrs2, env2) = compile_bl(bl2, env1) - (compile_bexp(b, env, if_else) ++ - instrs1 ++ - List("goto " + if_end + "\n") ++ - List("\n" + if_else + ":\n\n") ++ - instrs2 ++ - List("\n" + if_end + ":\n\n"), env2) - } - case While(b, bl) => { - val loop_begin = Fresh("Loop_begin") - val loop_end = Fresh("Loop_end") - val (instrs1, env1) = compile_bl(bl, env) - (List("\n" + loop_begin + ":\n\n") ++ - compile_bexp(b, env, loop_end) ++ - instrs1 ++ - List("goto " + loop_begin + "\n") ++ - List("\n" + loop_end + ":\n\n"), env1) - } - case Write(x) => - (List("iload " + env(x) + "\n" + "invokestatic XXX/XXX/write(I)V\n"), env) -} - -def compile_bl(bl: Block, env: Env) : (Instrs, Env) = bl match { - case Nil => (Nil, env) - case s::bl => { - val (instrs1, env1) = compile_stmt(s, env) - val (instrs2, env2) = compile_bl(bl, env1) - (instrs1 ++ instrs2, env2) - } -} - -def compile(input: String) : String = { - val class_name = input.split('.')(0) - val tks = Tok.fromFile(input) - val ast = Stmts.parse_single(tks) - val instructions = compile_bl(ast, Map.empty)._1 - (beginning ++ instructions.mkString ++ ending).replaceAllLiterally("XXX", class_name) -} - - -def compile_to(input: String, output: String) = { - val fw = new java.io.FileWriter(output) - fw.write(compile(input)) - fw.close() -} - -// -val tks = Tok.fromString("x := x + 1") -val ast = Stmt.parse_single(tks) -println(compile_stmt(ast, Map("x" -> "n"))._1.mkString) - - - -//examples - -compile_to("loops.while", "loops.j") -//compile_to("fib.while", "fib.j") - - -// testing cases for time measurements - -def time_needed[T](i: Int, code: => T) = { - val start = System.nanoTime() - for (j <- 1 to i) code - val end = System.nanoTime() - (end - start)/(i * 1.0e9) -} - -// for testing -import scala.sys.process._ - -val test_prog = """ -start := XXX; -x := start; -y := start; -z := start; -while 0 < x do { - while 0 < y do { - while 0 < z do { - z := z - 1 - }; - z := start; - y := y - 1 - }; - y := start; - x := x - 1 -}; -write x; -write y; -write z -""" - - -def compile_test(n: Int) : Unit = { - val class_name = "LOOP" - val tks = Tok.fromString(test_prog.replaceAllLiterally("XXX", n.toString)) - val ast = Stmts.parse_single(tks) - val instructions = compile_bl(ast, Map.empty)._1 - val assembly = (beginning ++ instructions.mkString ++ ending).replaceAllLiterally("XXX", class_name) - val fw = new java.io.FileWriter(class_name + ".j") - fw.write(assembly) - fw.close() - val test = ("java -jar jvm/jasmin-2.4/jasmin.jar " + class_name + ".j").!! - println(n + " " + time_needed(2, ("java " + class_name + "/" + class_name).!!)) -} - -List(1, 5000, 10000, 50000, 100000, 250000, 500000, 750000, 1000000).map(compile_test(_)) - - - -// Javabyte code assmbler -// -// java -jar jvm/jasmin-2.4/jasmin.jar loops.j - - - - - - diff -r 47f86885d481 -r e85600529ca5 crawler.scala --- a/crawler.scala Sun Dec 23 00:38:56 2012 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,144 +0,0 @@ -import io.Source -import scala.util.matching.Regex - -// gets the first ~10K of a page -def get_page(url: String) : String = { - try { - Source.fromURL(url).take(10000).mkString - } - catch { - case e => { - println(" Problem with: " + url) - "" - } - } -} - -// non-existing page -> returns the empty string -get_page("""http://www.foobar.com""") - - -// staring URL for the crawler -val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc/""" - -// starts with an " -// then either http or https -// then :// -// then any character that is not " -// finally " -val http_pattern = """\"((?:http|https)://(?:[^\"])*)\"""".r -val http_pattern = """\"(https?://[^\"]*)\"""".r - -def unquote(s: String) = s.drop(1).dropRight(1) - -def get_all_URLs(page: String) : Set[String] = { - (http_pattern.findAllIn(page)).map { unquote(_) }.toSet -} - -// get all urls in startURL -get_all_URLs(get_page(startURL)) - -// number of all urls in startURL -get_all_URLs(get_page(startURL)).toList.length - - -// naive version - seraches until a given depth -// visits pages potentially more than once -def crawl(url: String, n: Int) : Unit = { - if (n == 0) () - else { - println("Visiting: " + n + " " + url) - for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1) - } -} - -crawl(startURL, 2) - - -//breadth-first version without visiting -//pages twice -def bf_crawl(todo: Set[String], visited: Set[String], n: Int) : Unit = { - if (n == 0) () - else { - val new_todo = todo.flatMap { - url => { - if (visited.contains(url)) Set[String]() - else { - println("Visiting: " + n + " " + url) - get_all_URLs(get_page(url)) - } - } - } - bf_crawl(new_todo, visited union todo, n - 1) - } -} - -bf_crawl(Set(startURL1), Set(), 2) - - -//breadth-first version without visiting -//pages twice and only in "my" domain -val my_pattern = """urbanc""".r - -// breadth first search avoiding double searches -def bf_crawl2(todo: Set[String], visited: Set[String], n: Int) : Unit = { - if (n == 0) () - else { - val new_todo = todo.flatMap { - url => { - if (visited.contains(url)) Set[String]() - else if (my_pattern.findFirstIn(url) == None) Set[String]() - else { - println("Visiting: " + n + " " + url); - get_all_URLs(get_page(url)) - } - } - } - bf_crawl2(new_todo, visited union todo, n - 1) - } -} - -bf_crawl2(Set(startURL1), Set(), 5) - -// email harvester -// from -// http://net.tutsplus.com/tutorials/other/8-regular-expressions-you-should-know/ - -val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r - -def bf_crawl3(todo: Set[String], visited: Set[String], n: Int) : Unit = { - if (n == 0) () - else { - val new_todo = todo.flatMap { - url => { - if (visited.contains(url)) Set[String]() - else { - println("Visiting: " + n + " " + url); - val page = get_page(url) - println(email_pattern.findAllIn(page).mkString("\n")) - get_all_URLs(get_page(url)) - } - } - } - bf_crawl3(new_todo, visited union todo, n - 1) - } -} - -bf_crawl3(Set(startURL1), Set(), 3) - - -// depth-first version does not work, -// because it might visit pages at depth 1 -// while it still wants to visit them at -// depth 2 -var visited = Set("") - -def crawl(url: String, n: Int) : Unit = { - if (n == 0) () - else if (visited.contains(url)) () //println("Already visited: " + n + " " + url) - else { - println("Visiting: " + n + " " + url); - visited += url - for (u <- getAllURLs(getURLpage(url))) crawl(u, n - 1); - } -} diff -r 47f86885d481 -r e85600529ca5 crawler1.scala --- a/crawler1.scala Sun Dec 23 00:38:56 2012 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,45 +0,0 @@ -import io.Source -import scala.util.matching.Regex - -// gets the first ~10K of a page -def get_page(url: String) : String = { - try { - Source.fromURL(url).take(10000).mkString - } - catch { - case e => { - println(" Problem with: " + url) - "" - } - } -} - - -// regex for URLs -val http_pattern = """\"https?://[^\"]*\"""".r - -def unquote(s: String) = s.drop(1).dropRight(1) - -def get_all_URLs(page: String) : Set[String] = { - (http_pattern.findAllIn(page)).map { unquote(_) }.toSet -} - -// naive version - seraches until a given depth -// visits pages potentially more than once -def crawl(url: String, n: Int) : Unit = { - if (n == 0) () - else { - println("Visiting: " + n + " " + url) - for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1) - } -} - -// staring URL for the crawler -val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc/""" -//val startURL = """http://www.inf.kcl.ac.uk/staff/mml/""" - - -// call on the command line -crawl(startURL, 2) - -crawl("""http://www.dcs.kcl.ac.uk/staff/urbanc/msc-projects-12.html""", 2) diff -r 47f86885d481 -r e85600529ca5 crawler2.scala --- a/crawler2.scala Sun Dec 23 00:38:56 2012 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,44 +0,0 @@ -import io.Source -import scala.util.matching.Regex - -// gets the first ~10K of a page -def get_page(url: String) : String = { - try { - Source.fromURL(url).take(10000).mkString - } - catch { - case e => { - println(" Problem with: " + url) - "" - } - } -} - -// staring URL for the crawler -val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc/""" - -// regex for URLs -val http_pattern = """\"https?://[^\"]*\"""".r -val my_urls = """urbanc""".r - -def unquote(s: String) = s.drop(1).dropRight(1) - -def get_all_URLs(page: String) : Set[String] = { - (http_pattern.findAllIn(page)).map { unquote(_) }.toSet -} - -// naive version - seraches until a given depth -// visits pages potentially more than once -def crawl(url: String, n: Int) : Unit = { - if (n == 0) () - else if (my_urls.findFirstIn(url) == None) () - else { - println("Visiting: " + n + " " + url) - for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1) - } -} - -// can now deal with depth 3 -// start on command line -crawl(startURL, 4) - diff -r 47f86885d481 -r e85600529ca5 crawler3.scala --- a/crawler3.scala Sun Dec 23 00:38:56 2012 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,49 +0,0 @@ -import io.Source -import scala.util.matching.Regex - -// gets the first ~10K of a page -def get_page(url: String) : String = { - try { - Source.fromURL(url).take(10000).mkString - } - catch { - case e => { - println(" Problem with: " + url) - "" - } - } -} - -// staring URL for the crawler -val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc/""" - -// regex for URLs -val http_pattern = """\"https?://[^\"]*\"""".r -val my_urls = """urbanc""".r -val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r - -// http://net.tutsplus.com/tutorials/other/8-regular-expressions-you-should-know/ - -def unquote(s: String) = s.drop(1).dropRight(1) - -def get_all_URLs(page: String) : Set[String] = { - (http_pattern.findAllIn(page)).map { unquote(_) }.toSet -} - -// naive version - seraches until a given depth -// visits pages potentially more than once -def crawl(url: String, n: Int) : Unit = { - if (n == 0) () - //else if (my_urls.findFirstIn(url) == None) () - else { - println("Visiting: " + n + " " + url) - val page = get_page(url) - println(email_pattern.findAllIn(page).mkString("\n")) - for (u <- get_all_URLs(page)) crawl(u, n - 1) - } -} - -// can now deal with depth 3 -// start on command line -crawl(startURL, 3) - diff -r 47f86885d481 -r e85600529ca5 html.scala --- a/html.scala Sun Dec 23 00:38:56 2012 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,99 +0,0 @@ - -//:load matcher.scala - -// some regular expressions -val SYM = RANGE("""ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz.,!?-{[()]}':;%0123456789""") -val WORD = PLUS(SYM) - -val BTAG = SEQS("<", WORD, ">") -val ETAG = SEQS("") - -val WHITESPACE = PLUS(RANGE(" \n")) - -// for classifying the strings that have been recognised -abstract class Token -case object T_WHITESPACE extends Token -case class T_WORD(s: String) extends Token -case class T_ETAG(s: String) extends Token -case class T_BTAG(s: String) extends Token -case class T_NT(s: String, rhs: List[Token]) extends Token - -val lexing_rules: List[Rule[Token]] = - List((BTAG, (s) => T_BTAG(s.mkString)), - (ETAG, (s) => T_ETAG(s.mkString)), - (WORD, (s) => T_WORD(s.mkString)), - (WHITESPACE, (s) => T_WHITESPACE)) - -// the tokenizer -val T = Tokenizer(lexing_rules) - -// width for printing -val WIDTH = 60 - - -def interpret(ts: List[Token], c: Int, ctr: List[String]) : Unit= ts match { - case Nil => println(Console.RESET) - case T_WHITESPACE::rest => print(Console.RESET + " "); interpret(rest, c + 1, ctr) - case T_WORD(s)::rest => { - val newstr = Console.RESET + ctr.reverse.mkString + s - if (c + s.length < WIDTH) { - print(newstr); - interpret(rest, c + s.length, ctr) - } - else { - print("\n" + newstr) - interpret(rest, s.length, ctr) - } - } - case T_BTAG("

")::rest => print("\n"); interpret(rest, 0, ctr) - case T_ETAG("

")::rest => print("\n"); interpret(rest, 0, ctr) - case T_BTAG("")::rest => interpret(rest, c, Console.BOLD :: ctr) - case T_BTAG("")::rest => interpret(rest, c, Console.UNDERLINED :: ctr) - case T_BTAG("")::rest => interpret(rest, c, Console.CYAN :: ctr) - case T_BTAG("")::rest => interpret(rest, c, Console.RED :: ctr) - case T_BTAG("")::rest => interpret(rest, c, Console.BLINK :: ctr) - case T_ETAG(_)::rest => interpret(rest, c, ctr.tail) - case _::rest => interpret(rest, c, ctr) -} - -val test_string = """ -MSc Projects - -
-start of paragraph. a cyan word normal again something longer. -
- - -
Description: - Regular expressions are extremely useful for many text-processing tasks such as finding patterns in texts, - lexing programs, syntax highlighting and so on. Given that regular expressions were - introduced in 1950 by Stephen Kleene, you might think - regular expressions have since been studied and implemented to death. But you would definitely be mistaken: in fact they are still - an active research area. For example - this paper - about regular expression matching and partial derivatives was presented this summer at the international - PPDP'12 conference. The task in this project is to implement the results from this paper.
- -
The background for this project is that some regular expressions are - evil - and can stab you in the back; according to - this blog post. - For example, if you use in Python or - in Ruby (probably also in other mainstream programming languages) the - innocently looking regular expression a?{28}a{28} and match it, say, against the string - aaaaaaaaaaaaaaaaaaaaaaaaaaaa (that is 28 as), you will soon notice that your CPU usage goes to 100%. In fact, - Python and Ruby need approximately 30 seconds of hard work for matching this string. You can try it for yourself: - re.py (Python version) and - re.rb - (Ruby version). You can imagine an attacker - mounting a nice DoS attack against - your program if it contains such an evil regular expression. Actually - Scala (and also Java) are almost immune from such - attacks as they can deal with strings of up to 4,300 as in less than a second. But if you scale - the regular expression and string further to, say, 4,600 as, then you get a - StackOverflowError - potentially crashing your program. -
-""" - -interpret(T.fromString(test_string), 0, Nil) diff -r 47f86885d481 -r e85600529ca5 matcher.scala --- a/matcher.scala Sun Dec 23 00:38:56 2012 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,130 +0,0 @@ -package object matcher { - -// regular expressions -// including constructors for NOT and ALLC -abstract class Rexp - -case object NULL extends Rexp -case object EMPTY extends Rexp -case object ALLC extends Rexp // recognises any character -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp -case class NOT(r: Rexp) extends Rexp // negation of a regular expression - - -// nullable function: tests whether the regular -// expression can recognise the empty string -def nullable (r: Rexp) : Boolean = r match { - case NULL => false - case EMPTY => true - case ALLC => false - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true - case NOT(r) => !(nullable(r)) -} - -// tests whether a regular expression -// cannot recognise more -def no_more (r: Rexp) : Boolean = r match { - case NULL => true - case EMPTY => false - case ALLC => false - case CHAR(_) => false - case ALT(r1, r2) => no_more(r1) && no_more(r2) - case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1) - case STAR(_) => false - case NOT(r) => !(no_more(r)) -} - - -// derivative of a regular expression w.r.t. a character -def der (c: Char, r: Rexp) : Rexp = r match { - case NULL => NULL - case EMPTY => NULL - case ALLC => EMPTY - case CHAR(d) => if (c == d) EMPTY else NULL - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r) => SEQ(der(c, r), STAR(r)) - case NOT(r) => NOT(der (c, r)) -} - -// main class for the tokenizer -case class Tokenizer[T](rules: List[(Rexp, List[Char] => T)], excl: List[T] = Nil) { - -def munch(r: Rexp, action: List[Char] => T, s: List[Char], t: List[Char]) : Option[(List[Char], T)] = - s match { - case Nil if (nullable(r)) => Some(Nil, action(t)) - case Nil => None - case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, action(t)) - case c::s if (no_more(der (c, r))) => None - case c::s => munch(der (c, r), action, s, t ::: List(c)) - } - -def one_token(s: List[Char]) : Either[(List[Char], T), String] = { - val somes = rules.map { (r) => munch(r._1, r._2, s, Nil) }.flatten - if (somes == Nil) Right(s.mkString) - else Left(somes sortBy (_._1.length) head) -} - -def tokenize(cs: List[Char]) : List[T] = cs match { - case Nil => Nil - case _ => one_token(cs) match { - case Left((rest, token)) => token :: tokenize(rest) - case Right(s) => { println("Cannot tokenize: \"" + s + "\""); Nil } - } -} - -def fromString(s: String) : List[T] = - tokenize(s.toList).filterNot(excl.contains(_)) - -def fromFile(name: String) : List[T] = - fromString(io.Source.fromFile(name).mkString) - -} - - -// regular expression for specifying -// ranges of characters -def Range(s : List[Char]) : Rexp = s match { - case Nil => NULL - case c::Nil => CHAR(c) - case c::s => ALT(CHAR(c), Range(s)) -} -def RANGE(s: String) = Range(s.toList) - - -// one or more -def PLUS(r: Rexp) = SEQ(r, STAR(r)) - -// many alternatives -def Alts(rs: List[Rexp]) : Rexp = rs match { - case Nil => NULL - case r::Nil => r - case r::rs => ALT(r, Alts(rs)) -} -def ALTS(rs: Rexp*) = Alts(rs.toList) - -// repetitions -def Seqs(rs: List[Rexp]) : Rexp = rs match { - case Nil => NULL - case r::Nil => r - case r::rs => SEQ(r, Seqs(rs)) -} -def SEQS(rs: Rexp*) = Seqs(rs.toList) - -// some convenience for typing in regular expressions -def charlist2rexp(s : List[Char]) : Rexp = s match { - case Nil => EMPTY - case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) -} -implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) - -} diff -r 47f86885d481 -r e85600529ca5 parser1.scala --- a/parser1.scala Sun Dec 23 00:38:56 2012 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,88 +0,0 @@ -// A naive bottom-up parser with backtracking -// -// Needs: -// :load matcher.scala - -// some regular expressions -val DIGIT = RANGE("0123456789") -val NONZERODIGIT = RANGE("123456789") - -val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") -val LPAREN = CHAR('(') -val RPAREN = CHAR(')') -val WHITESPACE = PLUS(RANGE(" \n")) -val OPS = RANGE("+-*") - -// for classifying the strings that have been recognised - -abstract class Token -case object T_WHITESPACE extends Token -case object T_NUM extends Token -case class T_OP(s: String) extends Token -case object T_LPAREN extends Token -case object T_RPAREN extends Token -case class NT(s: String) extends Token - -// lexing rules for arithmetic expressions -val lexing_rules: List[Rule[Token]]= - List((NUMBER, (s) => T_NUM), - (WHITESPACE, (s) => T_WHITESPACE), - (LPAREN, (s) => T_LPAREN), - (RPAREN, (s) => T_RPAREN), - (OPS, (s) => T_OP(s.mkString))) - -// the tokenizer -val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE)) - -type Grammar = List[(String, List[Token])] - -// grammar for arithmetic expressions -val grammar = - List ("F" -> List(T_NUM), - "E" -> List(T_NUM), - "E" -> List(NT("E"), T_OP("+"), NT("E")), - "E" -> List(NT("E"), T_OP("-"), NT("E")), - "E" -> List(NT("E"), T_OP("*"), NT("E")), - "E" -> List(T_LPAREN, NT("E"), T_RPAREN)) - - -def chop[A](ts1: List[A], prefix: List[A], ts2: List[A]) : Option[(List[A], List[A])] = - ts1 match { - case Nil => None - case t::ts => - if (ts1.startsWith(prefix)) Some(ts2.reverse, ts1.drop(prefix.length)) - else chop(ts, prefix, t::ts2) - } - -// examples for chop -chop(List(1,2,3,4,5,6,7,8,9), List(4,5), Nil) -chop(List(1,2,3,4,5,6,7,8,9), List(3,5), Nil) - -def replace[A](ts: List[A], out: List[A], in: List [A]) = - chop(ts, out, Nil) match { - case None => None - case Some((before, after)) => Some(before ::: in ::: after) - } - -def parse(g: Grammar, ts: List[Token]) : Boolean = { - println(ts) - if (ts == List(NT("E"))) true - else { - val tss = for ((lhs, rhs) <- g) yield replace(ts, rhs, List(NT(lhs))) - tss.flatten.exists(parse(g, _)) - } -} - -def parser(g: Grammar, s: String) = { - println("\n") - parse(g, Tok.fromString(s)) -} - - - -parser(grammar, "2 + 3 * 4 + 1") -parser(grammar, "(2 + 3) * (4 + 1)") -parser(grammar, "(2 + 3) * 4 (4 + 1)") - - - diff -r 47f86885d481 -r e85600529ca5 parser2.scala --- a/parser2.scala Sun Dec 23 00:38:56 2012 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,139 +0,0 @@ -// A naive version of parser combinators producing parse trees -// -// Needs -// :load matcher.scala - -// some regular expressions -val LETTER = RANGE("abcdefghijklmnopqrstuvwxyz") -val ID = PLUS(LETTER) - -val DIGIT = RANGE("0123456789") -val NONZERODIGIT = RANGE("123456789") -val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") - -val LPAREN = CHAR('(') -val RPAREN = CHAR(')') - -val WHITESPACE = PLUS(RANGE(" \n")) -val OPS = RANGE("+-*") - -// for classifying the strings that have been recognised -abstract class Token - -case object T_WHITESPACE extends Token -case class T_NUM(s: String) extends Token -case class T_ID(s: String) extends Token -case class T_OP(s: String) extends Token -case object T_LPAREN extends Token -case object T_RPAREN extends Token -case object T_IF extends Token -case object T_THEN extends Token -case object T_ELSE extends Token - -// lexing rules for arithmetic expressions -val lexing_rules: List[Rule[Token]]= - List(("if", (s) => T_IF), - ("then", (s) => T_THEN), - ("else", (s) => T_ELSE), - (NUMBER, (s) => T_NUM(s.mkString)), - (ID, (s) => T_ID(s.mkString)), - (WHITESPACE, (s) => T_WHITESPACE), - (LPAREN, (s) => T_LPAREN), - (RPAREN, (s) => T_RPAREN), - (OPS, (s) => T_OP(s.mkString))) - -val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE)) - - -// parse trees -abstract class ParseTree -case class Leaf(t: Token) extends ParseTree -case class Branch(pts: List[ParseTree]) extends ParseTree - -def combine(pt1: ParseTree, pt2: ParseTree) = pt1 match { - case Leaf(t) => Branch(List(Leaf(t), pt2)) - case Branch(pts) => Branch(pts ++ List(pt2)) -} - -// parser combinators -abstract class Parser { - def parse(ts: List[Token]): Set[(ParseTree, List[Token])] - - def parse_all(ts: List[Token]) : Set[ParseTree] = - for ((head, tail) <- parse(ts); if (tail == Nil)) yield head - - def || (right : => Parser) : Parser = new AltParser(this, right) - def ~ (right : => Parser) : Parser = new SeqParser(this, right) -} - -class AltParser(p: => Parser, q: => Parser) extends Parser { - def parse (ts: List[Token]) = p.parse(ts) ++ q.parse(ts) -} - -class SeqParser(p: => Parser, q: => Parser) extends Parser { - def parse(ts: List[Token]) = - for ((head1, tail1) <- p.parse(ts); - (head2, tail2) <- q.parse(tail1)) yield (combine(head1, head2), tail2) -} - -class ListParser(ps: => List[Parser]) extends Parser { - def parse(ts: List[Token]) = ps match { - case Nil => Set() - case p::Nil => p.parse(ts) - case p::ps => - for ((head1, tail1) <- p.parse(ts); - (head2, tail2) <- new ListParser(ps).parse(tail1)) yield (Branch(List(head1, head2)), tail2) - } -} - -case class TokParser(tok: Token) extends Parser { - def parse(ts: List[Token]) = ts match { - case t::ts if (t == tok) => Set((Leaf(t), ts)) - case _ => Set () - } -} - -implicit def token2tparser(t: Token) = TokParser(t) - -case object IdParser extends Parser { - def parse(ts: List[Token]) = ts match { - case T_ID(s)::ts => Set((Leaf(T_ID(s)), ts)) - case _ => Set () - } -} - -case object NumParser extends Parser { - def parse(ts: List[Token]) = ts match { - case T_NUM(s)::ts => Set((Leaf(T_NUM(s)), ts)) - case _ => Set () - } -} - -lazy val E: Parser = (T ~ T_OP("+") ~ E) || T // start symbol -lazy val T: Parser = (F ~ T_OP("*") ~ T) || F -lazy val F: Parser = (T_LPAREN ~ E ~ T_RPAREN) || NumParser - -println(Tok.fromString("1 + 2 + 3")) -println(E.parse_all(Tok.fromString("1 + 2 + 3"))) - -def eval(t: ParseTree) : Int = t match { - case Leaf(T_NUM(n)) => n.toInt - case Branch(List(t1, Leaf(T_OP("+")), t2)) => eval(t1) + eval(t2) - case Branch(List(t1, Leaf(T_OP("*")), t2)) => eval(t1) * eval(t2) - case Branch(List(Leaf(T_LPAREN), t, Leaf(T_RPAREN))) => eval(t) -} - -(E.parse_all(Tok.fromString("1 + 2 + 3"))).map(eval(_)) -(E.parse_all(Tok.fromString("1 + 2 * 3"))).map(eval(_)) - -lazy val EXPR: Parser = - new ListParser(List(T_IF, EXPR, T_THEN, EXPR)) || - new ListParser(List(T_IF, EXPR, T_THEN, EXPR, T_ELSE, EXPR)) || - IdParser - -println(EXPR.parse_all(Tok.fromString("if a then b else c"))) -println(EXPR.parse_all(Tok.fromString("if a then if x then y else c"))) - - - - diff -r 47f86885d481 -r e85600529ca5 parser2a.scala --- a/parser2a.scala Sun Dec 23 00:38:56 2012 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,105 +0,0 @@ -// Parser combinators including semantic actions -// parses lists of tokens -// -// Needs -// :load matcher.scala - -// some regular expressions -val LETTER = RANGE("abcdefghijklmnopqrstuvwxyz") -val ID = PLUS(LETTER) - -val DIGIT = RANGE("0123456789") -val NONZERODIGIT = RANGE("123456789") -val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") - -val LPAREN = CHAR('(') -val RPAREN = CHAR(')') - -val WHITESPACE = PLUS(RANGE(" \n")) -val OPS = RANGE("+-*") - -// for classifying the strings that have been recognised -abstract class Token - -case object T_WHITESPACE extends Token -case class T_NUM(s: String) extends Token -case class T_ID(s: String) extends Token -case class T_OP(s: String) extends Token -case object T_LPAREN extends Token -case object T_RPAREN extends Token -case object T_IF extends Token -case object T_THEN extends Token -case object T_ELSE extends Token - -// lexing rules for arithmetic expressions -val lexing_rules: List[Rule[Token]]= - List(("if", (s) => T_IF), - ("then", (s) => T_THEN), - ("else", (s) => T_ELSE), - (NUMBER, (s) => T_NUM(s.mkString)), - (ID, (s) => T_ID(s.mkString)), - (WHITESPACE, (s) => T_WHITESPACE), - (LPAREN, (s) => T_LPAREN), - (RPAREN, (s) => T_RPAREN), - (OPS, (s) => T_OP(s.mkString))) - -val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE)) - -// parser combinators with return type T -abstract class Parser[T] { - def parse(ts: List[Token]): Set[(T, List[Token])] - - def parse_all(ts: List[Token]) : Set[T] = - for ((head, tail) <- parse(ts); if (tail == Nil)) yield head - - def || (right : => Parser[T]) : Parser[T] = new AltParser(this, right) - def ==>[S] (f: => T => S) : Parser [S] = new FunParser(this, f) - def ~[S] (right : => Parser[S]) : Parser[(T, S)] = new SeqParser(this, right) - def ~>[S] (right : => Parser[S]) : Parser[S] = this ~ right ==> (x => x._2) - def <~[S] (right : => Parser[S]) : Parser[T] = this ~ right ==> (x => x._1) - -} - -class SeqParser[T, S](p: => Parser[T], q: => Parser[S]) extends Parser[(T, S)] { - def parse(sb: List[Token]) = - for ((head1, tail1) <- p.parse(sb); - (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2) -} - -class AltParser[T](p: => Parser[T], q: => Parser[T]) extends Parser[T] { - def parse (sb: List[Token]) = p.parse(sb) ++ q.parse(sb) -} - -class FunParser[T, S](p: => Parser[T], f: T => S) extends Parser[S] { - def parse (sb: List[Token]) = - for ((head, tail) <- p.parse(sb)) yield (f(head), tail) -} - - -case class TokParser(tok: Token) extends Parser[Token] { - def parse(ts: List[Token]) = ts match { - case t::ts if (t == tok) => Set((t, ts)) - case _ => Set () - } -} - -implicit def token2tparser(t: Token) = TokParser(t) - -case object NumParser extends Parser[Int] { - def parse(ts: List[Token]) = ts match { - case T_NUM(s)::ts => Set((s.toInt, ts)) - case _ => Set () - } -} - -lazy val E: Parser[Int] = (T ~ T_OP("+") ~ E) ==> { case ((x, y), z) => x + z } || T -lazy val T: Parser[Int] = (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => x * z } || F -lazy val F: Parser[Int] = (T_LPAREN ~> E <~ T_RPAREN) || NumParser - -println(E.parse_all(Tok.fromString("1 + 2 + 3"))) -println(E.parse_all(Tok.fromString("1 + 2 * 3"))) -println(E.parse_all(Tok.fromString("(1 + 2) * 3"))) - -// Excercise: implement minus -println(E.parse_all(Tok.fromString("(1 - 2) * 3"))) -println(E.parse_all(Tok.fromString("(1 + 2) * - 3"))) diff -r 47f86885d481 -r e85600529ca5 parser3.scala --- a/parser3.scala Sun Dec 23 00:38:56 2012 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,41 +0,0 @@ -package object parser { - -// parser combinators -// with input type I and return type T -// -// needs to be compiled with scalac parser3.scala - -abstract class Parser[I <% Seq[_], T] { - def parse(ts: I): Set[(T, I)] - - def parse_all(ts: I) : Set[T] = - for ((head, tail) <- parse(ts); if (tail.isEmpty)) yield head - - def parse_single(ts: I) : T = parse_all(ts).toList match { - case t::Nil => t - case _ => { println ("Parse Error") ; sys.exit(-1) } - } - - def || (right : => Parser[I, T]) : Parser[I, T] = new AltParser(this, right) - def ==>[S] (f: => T => S) : Parser [I, S] = new FunParser(this, f) - def ~[S] (right : => Parser[I, S]) : Parser[I, (T, S)] = new SeqParser(this, right) - def ~>[S] (right : => Parser[I, S]) : Parser[I, S] = this ~ right ==> (_._2) - def <~[S] (right : => Parser[I, S]) : Parser[I, T] = this ~ right ==> (_._1) -} - -class SeqParser[I <% Seq[_], T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, (T, S)] { - def parse(sb: I) = - for ((head1, tail1) <- p.parse(sb); - (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2) -} - -class AltParser[I <% Seq[_], T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] { - def parse(sb: I) = p.parse(sb) ++ q.parse(sb) -} - -class FunParser[I <% Seq[_], T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] { - def parse(sb: I) = - for ((head, tail) <- p.parse(sb)) yield (f(head), tail) -} - -} diff -r 47f86885d481 -r e85600529ca5 parser4.scala --- a/parser4.scala Sun Dec 23 00:38:56 2012 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,82 +0,0 @@ - -// parser combinators with input type I and return type T - -case class SubString(s: String, l: Int, h: Int) { - def low = l - def high = h - def length = h - l - def substring(l: Int = l, h: Int = h) = s.slice(l, h) - def set(low: Int = l, high: Int = h) = SubString(s, low, high) - -} - -type Ctxt = List[(String, SubString)] - -abstract class Parser[T] { - - def parse(ts: SubString, ctxt: Ctxt): Set[(T, SubString)] - - def parse_all(s: String) : Set[T] = - for ((head, tail) <- parse(SubString(s, 0, s.length), Nil); if (tail.substring() == "")) yield head - - def || (right : => Parser[T]) : Parser[T] = new AltParser(this, right) - def ==>[S] (f: => T => S) : Parser [S] = new FunParser(this, f) - def ~[S] (right : => Parser[S]) : Parser[(T, S)] = new SeqParser(this, right) -} - -class SeqParser[T, S](p: => Parser[T], q: => Parser[S]) extends Parser[(T, S)] { - def parse(sb: SubString, ctxt: Ctxt) = - for ((head1, tail1) <- p.parse(sb, ctxt); - (head2, tail2) <- q.parse(tail1, ctxt)) yield ((head1, head2), tail2) -} - -class AltParser[T](p: => Parser[T], q: => Parser[T]) extends Parser[T] { - def parse(sb: SubString, ctxt: Ctxt) = p.parse(sb, ctxt) ++ q.parse(sb, ctxt) -} - -class FunParser[T, S](p: => Parser[T], f: T => S) extends Parser[S] { - def parse(sb: SubString, ctxt: Ctxt) = - for ((head, tail) <- p.parse(sb, ctxt)) yield (f(head), tail) -} - -case class SubStringParser(s: String) extends Parser[SubString] { - val n = s.length - def parse(sb: SubString, ctxt: Ctxt) = { - if (n <= sb.length && sb.substring(sb.low, sb.low + n) == s) - Set((sb.set(high = sb.low + n), sb.set(low = sb.low + n))) - else Set() - } -} - -implicit def string2parser(s: String) = SubStringParser(s) ==> (_.substring()) - -class IgnLst[T](p: => Parser[T]) extends Parser[T] { - def parse(sb: SubString, ctxt: Ctxt) = { - if (sb.length == 0) Set() - else for ((head, tail) <- p.parse(sb.set(high = sb.high - 1), ctxt)) - yield (head, tail.set(high = tail.high + 1)) - } -} - -class CHECK[T](nt: String, p: => Parser[T]) extends Parser[T] { - def parse(sb: SubString, ctxt: Ctxt) = { - val should_trim = ctxt.contains (nt, sb) - if (should_trim && sb.length == 0) Set() - else if (should_trim) new IgnLst(p).parse(sb, (nt, sb)::ctxt) - else p.parse(sb, (nt, sb)::ctxt) - } -} - -// ambigous grammar -lazy val E: Parser[Int] = - new CHECK("E", (E ~ "+" ~ E) ==> { case ((x, y), z) => x + z} || - (E ~ "*" ~ E) ==> { case ((x, y), z) => x * z} || - ("(" ~ E ~ ")") ==> { case ((x, y), z) => y} || - "0" ==> { (s) => 0 } || - "1" ==> { (s) => 1 } || - "2" ==> { (s) => 2 } || - "3" ==> { (s) => 3 }) - -println(E.parse_all("1+2*3+3")) - - diff -r 47f86885d481 -r e85600529ca5 parser5.scala --- a/parser5.scala Sun Dec 23 00:38:56 2012 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,113 +0,0 @@ -val DIGIT = RANGE("0123456789") -val NONZERODIGIT = RANGE("123456789") - -val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") -val LPAREN = CHAR('(') -val RPAREN = CHAR(')') -val WHITESPACE = PLUS(RANGE(" \n")) -val OPS = RANGE("+-*") - -// for classifying the strings that have been recognised -abstract class Token -case object T_WHITESPACE extends Token -case class T_NUM(s: String) extends Token -case class T_OP(s: String) extends Token -case object T_LPAREN extends Token -case object T_RPAREN extends Token - -val lexing_rules: List[Rule[Token]]= - List((NUMBER, (s) => T_NUM(s.mkString)), - (WHITESPACE, (s) => T_WHITESPACE), - (LPAREN, (s) => T_LPAREN), - (RPAREN, (s) => T_RPAREN), - (OPS, (s) => T_OP(s.mkString))) - -val Tk = Tokenizer(lexing_rules, List(T_WHITESPACE)) - - -// parser combinators with input type I and return type T -// and memoisation - -case class SubList[T](s: List[T], l: Int, h: Int) { - def low = l - def high = h - def length = h - l - def sublist(l: Int = l, h: Int = h) = s.slice(l, h) - def set(low: Int = l, high: Int = h) = SubList(s, low, high) -} - -type Ctxt[T] = List[(String, SubList[T])] - -abstract class Parser[I, T] { - - def parse(ts: SubList[I], ctxt: Ctxt[I]): Set[(T, SubList[I])] - - def parse_all(s: List[I]) : Set[T] = - for ((head, tail) <- parse(SubList(s, 0, s.length), Nil); if (tail.sublist() == Nil)) yield head - - def || (right : => Parser[I, T]) : Parser[I, T] = new AltParser(this, right) - def ==>[S] (f: => T => S) : Parser [I, S] = new FunParser(this, f) - def ~[S] (right : => Parser[I, S]) : Parser[I, (T, S)] = new SeqParser(this, right) - def ~>[S] (right : => Parser[I, S]) : Parser[I, S] = this ~ right ==> (_._2) - def <~[S] (right : => Parser[I, S]) : Parser[I, T] = this ~ right ==> (_._1) -} - -class SeqParser[I, T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, (T, S)] { - def parse(sb: SubList[I], ctxt: Ctxt[I]) = - for ((head1, tail1) <- p.parse(sb, ctxt); - (head2, tail2) <- q.parse(tail1, ctxt)) yield ((head1, head2), tail2) -} - -class AltParser[I, T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] { - def parse(sb: SubList[I], ctxt: Ctxt[I]) = p.parse(sb, ctxt) ++ q.parse(sb, ctxt) -} - -class FunParser[I, T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] { - def parse(sb: SubList[I], ctxt: Ctxt[I]) = - for ((head, tail) <- p.parse(sb, ctxt)) yield (f(head), tail) -} - -case object NumParser extends Parser[Token, Int] { - def parse(sb: SubList[Token], ctxt: Ctxt[Token]) = { - if (0 < sb.length) sb.sublist(sb.low, sb.low + 1) match { - case T_NUM(i)::Nil => Set((i.toInt, sb.set(low = sb.low + 1))) - case _ => Set() - } - else Set() - } -} - -case class TokParser(t: Token) extends Parser[Token, Token] { - def parse(sb: SubList[Token], ctxt: Ctxt[Token]) = { - if (0 < sb.length && sb.sublist(sb.low, sb.low + 1) == List(t)) Set((t, sb.set(low = sb.low + 1))) - else Set() - } -} - -implicit def token2tparser(t: Token) = TokParser(t) - -class IgnLst[I, T](p: => Parser[I, T]) extends Parser[I, T] { - def parse(sb: SubList[I], ctxt: Ctxt[I]) = { - if (sb.length == 0) Set() - else for ((head, tail) <- p.parse(sb.set(high = sb.high - 1), ctxt)) - yield (head, tail.set(high = tail.high + 1)) - } -} - -class CHECK[I, T](nt: String, p: => Parser[I, T]) extends Parser[I, T] { - def parse(sb: SubList[I], ctxt: Ctxt[I]) = { - val should_trim = ctxt.contains (nt, sb) - if (should_trim && sb.length == 0) Set() - else if (should_trim) new IgnLst(p).parse(sb, (nt, sb)::ctxt) - else p.parse(sb, (nt, sb)::ctxt) - } -} - -lazy val E: Parser[Token, Int] = - new CHECK("E", (E ~ T_OP("+") ~ E) ==> { case ((x, y), z) => x + z} || - (E ~ T_OP("*") ~ E) ==> { case ((x, y), z) => x * z} || - (T_LPAREN ~ E ~ T_RPAREN) ==> { case ((x, y), z) => y} || - NumParser) - -println(E.parse_all(Tk.fromString("1 + 2 * 3"))) -println(E.parse_all(Tk.fromString("(1 + 2) * 3"))) diff -r 47f86885d481 -r e85600529ca5 re-alt.scala --- a/re-alt.scala Sun Dec 23 00:38:56 2012 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,115 +0,0 @@ -trait RegExp { - def nullable: Boolean - def derive(c: Char): RegExp -} - -case object Empty extends RegExp { - def nullable = false - def derive(c: Char) = Empty -} - -case object Eps extends RegExp { - def nullable = true - def derive(c: Char) = Empty -} - -case class Str(s: String) extends RegExp { - def nullable = s.isEmpty - def derive(c: Char) = - if (s.isEmpty || s.head != c) Empty - else Str(s.tail) -} - -case class Cat(r: RegExp, s: RegExp) extends RegExp { - def nullable = r.nullable && s.nullable - def derive(c: Char) = - if (r.nullable) Or(Cat(r.derive(c), s), s.derive(c)) - else Cat(r.derive(c), s) -} - -case class Star(r: RegExp) extends RegExp { - def nullable = true - def derive(c: Char) = Cat(r.derive(c), this) -} - -case class Or(r: RegExp, s: RegExp) extends RegExp { - def nullable = r.nullable || s.nullable - def derive(c: Char) = Or(r.derive(c), s.derive(c)) -} - -case class And(r: RegExp, s: RegExp) extends RegExp { - def nullable = r.nullable && s.nullable - def derive(c: Char) = And(r.derive(c), s.derive(c)) -} - -case class Not(r: RegExp) extends RegExp { - def nullable = !r.nullable - def derive(c: Char) = Not(r.derive(c)) -} - - - - -object Matcher { - def matches(r: RegExp, s: String): Boolean = { - if (s.isEmpty) r.nullable - else matches(r.derive(s.head), s.tail) - } -} - - -object Pimps { - implicit def string2RegExp(s: String) = Str(s) - - implicit def regExpOps(r: RegExp) = new { - def | (s: RegExp) = Or(r, s) - def & (s: RegExp) = And(r, s) - def % = Star(r) - def %(n: Int) = rep(r, n) - def ? = Or(Eps, r) - def ! = Not(r) - def ++ (s: RegExp) = Cat(r, s) - def ~ (s: String) = Matcher.matches(r, s) - } - - implicit def stringOps(s: String) = new { - def | (r: RegExp) = Or(s, r) - def | (r: String) = Or(s, r) - def & (r: RegExp) = And(s, r) - def & (r: String) = And(s, r) - def % = Star(s) - def % (n: Int) = rep(Str(s), n) - def ? = Or(Eps, s) - def ! = Not(s) - def ++ (r: RegExp) = Cat(s, r) - def ++ (r: String) = Cat(s, r) - def ~ (t: String) = Matcher.matches(s, t) - } - - def rep(r: RegExp, n: Int): RegExp = - if (n <= 0) Star(r) - else Cat(r, rep(r, n - 1)) -} - - -object Test { - import Pimps._ - - val digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" - val int = ("+" | "-").? ++ digit.%(1) - val real = ("+" | "-").? ++ digit.%(1) ++ ("." ++ digit.%(1)).? ++ (("e" | "E") ++ ("+" | "-").? ++ digit.%(1)).? - - def main(args: Array[String]) { - val ints = List("0", "-4534", "+049", "99") - val reals = List("0.9", "-12.8", "+91.0", "9e12", "+9.21E-12", "-512E+01") - val errs = List("", "-", "+", "+-1", "-+2", "2-") - - ints.foreach(s => assert(int ~ s)) - reals.foreach(s => assert(!(int ~ s))) - errs.foreach(s => assert(!(int ~ s))) - - ints.foreach(s => assert(real ~ s)) - reals.foreach(s => assert(real ~ s)) - errs.foreach(s => assert(!(real ~ s))) - } -} diff -r 47f86885d481 -r e85600529ca5 re-internal.scala --- a/re-internal.scala Sun Dec 23 00:38:56 2012 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,17 +0,0 @@ - -// measures the time a function needs -def time_needed[T](i: Int, code: => T) = { - val start = System.nanoTime() - for (j <- 1 to i) code - val end = System.nanoTime() - (end - start)/(i * 1.0e9) -} - - -for (i <- 1 to 10001 by 300) { - val re = ("((a?){" + i + "})(a{" + i + "})") - println(i + " " + "%.5f".format(time_needed(1, ("a" * i).matches(re)))) -} - - - diff -r 47f86885d481 -r e85600529ca5 re0.scala --- a/re0.scala Sun Dec 23 00:38:56 2012 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,113 +0,0 @@ - -abstract class Rexp - -case object NULL extends Rexp -case object EMPTY extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp -case class NOT(r: Rexp) extends Rexp -case class REP(r: Rexp, n: Int) extends Rexp - -// some convenience for typing in regular expressions -def charlist2rexp(s : List[Char]) : Rexp = s match { - case Nil => EMPTY - case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) -} -implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) - -implicit def RexpOps(r: Rexp) = new { - def | (s: Rexp) = ALT(r, s) - def % = STAR(r) - def %(n: Int) = REP(r, n) - def %%(n: Int) = SEQ(REP(r, n), STAR(r)) - def ? = ALT(EMPTY, r) - def unary_! = NOT(r) - def ~ (s: Rexp) = SEQ(r, s) -} - -implicit def stringOps(s: String) = new { - def | (r: Rexp) = ALT(s, r) - def | (r: String) = ALT(s, r) - def % = STAR(s) - def %(n: Int) = REP(s, n) - def %%(n: Int) = SEQ(REP(s, n), STAR(s)) - def ? = ALT(EMPTY, s) - def unary_! = NOT(s) - def ~ (r: Rexp) = SEQ(s, r) - def ~ (r: String) = SEQ(s, r) -} - - -// nullable function: tests whether the regular -// expression can recognise the empty string -def nullable (r: Rexp) : Boolean = r match { - case NULL => false - case EMPTY => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true - case NOT(r) => !(nullable(r)) - case REP(r, i) => if (i == 0) true else nullable(r) -} - -// derivative of a regular expression w.r.t. a character -def der (c: Char, r: Rexp) : Rexp = r match { - case NULL => NULL - case EMPTY => NULL - case CHAR(d) => if (c == d) EMPTY else NULL - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r) => SEQ(der(c, r), STAR(r)) - case NOT(r) => NOT(der (c, r)) - case REP(r, i) => - if (i == 0) NULL else SEQ(der(c, r), REP(r, i - 1)) -} - -// derivative w.r.t. a string (iterates der) -def ders (s: List[Char], r: Rexp) : Rexp = s match { - case Nil => r - case c::s => ders(s, der(c, r)) -} - -// main matcher function -def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) - -//examples -val digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" -val int = ("+" | "-").? ~ digit.%%(1) -val real = ("+" | "-").? ~ digit.%%(1) ~ ("." ~ digit.%%(1)).? ~ (("e" | "E") ~ ("+" | "-").? ~ digit.%%(1)).? - -val ints = List("0", "-4534", "+049", "99") -val reals = List("0.9", "-12.8", "+91.0", "9e12", "+9.21E-12", "-512E+01") -val errs = List("", "-", "+", "+-1", "-+2", "2-") - -ints.map(s => matcher(int, s)) -reals.map(s => matcher(int, s)) -errs.map(s => matcher(int, s)) - -ints.map(s => matcher(real, s)) -reals.map(s => matcher(real, s)) -errs.map(s => matcher(real, s)) - - - -def RTEST(n: Int) = ("a".? %(n)) ~ ("a" %(n)) - -def time_needed[T](i: Int, code: => T) = { - val start = System.nanoTime() - for (j <- 1 to i) code - val end = System.nanoTime() - (end - start)/(i * 1.0e9) -} - -for (i <- 1 to 29) { - println(i + ": " + "%.5f".format(time_needed(1, matcher(RTEST(i), "a" * i)))) -} - - diff -r 47f86885d481 -r e85600529ca5 re1.scala --- a/re1.scala Sun Dec 23 00:38:56 2012 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,80 +0,0 @@ - -abstract class Rexp - -case object NULL extends Rexp -case object EMPTY extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp - -// some convenience for typing in regular expressions -def charlist2rexp(s : List[Char]) : Rexp = s match { - case Nil => EMPTY - case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) -} -implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) - - -// nullable function: tests whether the regular -// expression can recognise the empty string -def nullable (r: Rexp) : Boolean = r match { - case NULL => false - case EMPTY => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true -} - -// derivative of a regular expression w.r.t. a character -def der (c: Char, r: Rexp) : Rexp = r match { - case NULL => NULL - case EMPTY => NULL - case CHAR(d) => if (c == d) EMPTY else NULL - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r) => SEQ(der(c, r), STAR(r)) -} - -// derivative w.r.t. a string (iterates der) -def ders (s: List[Char], r: Rexp) : Rexp = s match { - case Nil => r - case c::s => ders(s, der(c, r)) -} - -// main matcher function -def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) - -//example -//val r = STAR(ALT(SEQ(CHAR('a'), CHAR('b')), CHAR('b'))) -//der('b', r) -//der('b', r) - -//one or zero -def OPT(r: Rexp) = ALT(r, EMPTY) - -//n-times -def NTIMES(r: Rexp, n: Int) : Rexp = n match { - case 0 => EMPTY - case 1 => r - case n => SEQ(r, NTIMES(r, n - 1)) -} - -def RTEST(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n)) - -def time_needed[T](i: Int, code: => T) = { - val start = System.nanoTime() - for (j <- 1 to i) code - val end = System.nanoTime() - (end - start)/(i * 1.0e9) -} - -for (i <- 1 to 29) { - println(i + ": " + "%.5f".format(time_needed(1, matcher(RTEST(i), "a" * i)))) -} - - diff -r 47f86885d481 -r e85600529ca5 re2.scala --- a/re2.scala Sun Dec 23 00:38:56 2012 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,100 +0,0 @@ - -abstract class Rexp { - def simp : Rexp = this -} - -case object NULL extends Rexp -case object EMPTY extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp { - override def simp = (r1.simp, r2.simp) match { - case (NULL, r) => r - case (r, NULL) => r - case (r, EMPTY) => if (nullable(r)) r else ALT(r, EMPTY) - case (EMPTY, r) => if (nullable(r)) r else ALT(r, EMPTY) - case (r1, r2) => if (r1 == r2) r1 else ALT(r1, r2) - } -} -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp { - override def simp = (r1.simp, r2.simp) match { - case (NULL, _) => NULL - case (_, NULL) => NULL - case (EMPTY, r) => r - case (r, EMPTY) => r - case (r1, r2) => SEQ(r1, r2) - } -} -case class STAR(r: Rexp) extends Rexp { - override def simp = r.simp match { - case NULL => EMPTY - case EMPTY => EMPTY - case r => STAR(r) - } -} - -// some convenience for typing in regular expressions -def charlist2rexp(s : List[Char]) : Rexp = s match { - case Nil => EMPTY - case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) -} -implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) - - -// nullable function: tests whether the regular -// expression can recognise the empty string -def nullable (r: Rexp) : Boolean = r match { - case NULL => false - case EMPTY => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true -} - -// derivative of a regular expression w.r.t. a character -def der (c: Char, r: Rexp) : Rexp = r match { - case NULL => NULL - case EMPTY => NULL - case CHAR(d) => if (c == d) EMPTY else NULL - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r) => SEQ(der(c, r), STAR(r)) -} - -// derivative w.r.t. a string (iterates der) -def ders (s: List[Char], r: Rexp) : Rexp = s match { - case Nil => r - case c::s => ders(s, der(c, r).simp) -} - -// main matcher function -def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) - - -//one or zero -def OPT(r: Rexp) = ALT(r, EMPTY) - -//n-times -def NTIMES(r: Rexp, n: Int) : Rexp = n match { - case 0 => EMPTY - case 1 => r - case n => SEQ(r, NTIMES(r, n - 1)) -} - -def RTEST(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n)) - -def time_needed[T](i: Int, code: => T) = { - val start = System.nanoTime() - for (j <- 1 to i) code - val end = System.nanoTime() - (end - start)/(i * 1.0e9) -} - -for (i <- 1 to 100) { - println(i + ": " + "%.5f".format(time_needed(1, matcher(RTEST(i), "a" * i)))) -} - - diff -r 47f86885d481 -r e85600529ca5 re3.scala --- a/re3.scala Sun Dec 23 00:38:56 2012 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,106 +0,0 @@ - -abstract class Rexp { - def simp : Rexp = this -} - -case object NULL extends Rexp -case object EMPTY extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp { - override def simp = (r1.simp, r2.simp) match { - case (NULL, r) => r - case (r, NULL) => r - case (r, EMPTY) => if (nullable(r)) r else ALT(r, EMPTY) - case (EMPTY, r) => if (nullable(r)) r else ALT(r, EMPTY) - case (r1, r2) => if (r1 == r2) r1 else ALT(r1, r2) - } -} -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp { - override def simp = (r1.simp, r2.simp) match { - case (NULL, _) => NULL - case (_, NULL) => NULL - case (EMPTY, r) => r - case (r, EMPTY) => r - case (r1, r2) => SEQ(r1, r2) - } -} -case class STAR(r: Rexp) extends Rexp { - override def simp = r.simp match { - case NULL => EMPTY - case EMPTY => EMPTY - case r => STAR(r) - } -} -case class NTIMES(r: Rexp, n: Int) extends Rexp { - override def simp = if (n == 0) EMPTY else - r.simp match { - case NULL => NULL - case EMPTY => EMPTY - case r => NTIMES(r, n) - } -} - -// some convenience for typing in regular expressions -def charlist2rexp(s : List[Char]) : Rexp = s match { - case Nil => EMPTY - case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) -} -implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) - - -// nullable function: tests whether the regular -// expression can recognise the empty string -def nullable (r: Rexp) : Boolean = r match { - case NULL => false - case EMPTY => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true - case NTIMES(r, i) => if (i == 0) true else nullable(r) -} - -// derivative of a regular expression w.r.t. a character -def der (c: Char, r: Rexp) : Rexp = r match { - case NULL => NULL - case EMPTY => NULL - case CHAR(d) => if (c == d) EMPTY else NULL - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r) => SEQ(der(c, r), STAR(r)) - case NTIMES(r, i) => - if (i == 0) NULL else SEQ(der(c, r), NTIMES(r, i - 1)) -} - -// derivative w.r.t. a string (iterates der) -def ders (s: List[Char], r: Rexp) : Rexp = s match { - case Nil => r - case c::s => ders(s, der(c, r).simp) -} - -// main matcher function -def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) - - - -//one or zero -def OPT(r: Rexp) = ALT(r, EMPTY) - -def RTEST(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n)) - -def time_needed[T](i: Int, code: => T) = { - val start = System.nanoTime() - for (j <- 1 to i) code - val end = System.nanoTime() - (end - start)/(i * 1.0e9) -} - - -for (i <- 1 to 11001 by 500) { - println(i + " " + + " " + time_needed(1, matcher(RTEST(i), "a" * i))) -} - - diff -r 47f86885d481 -r e85600529ca5 regexp.scala --- a/regexp.scala Sun Dec 23 00:38:56 2012 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,106 +0,0 @@ -// regular expressions -abstract class Rexp - -case object NULL extends Rexp -case object EMPTY extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp - -// some convenience for typing in regular expressions -def charlist2rexp(s : List[Char]) : Rexp = s match { - case Nil => EMPTY - case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) -} -implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) - - -// for example -println(STAR("abc")) - -// produces STAR(SEQ(CHAR(a),SEQ(CHAR(b),SEQ(CHAR(c),EMPTY)))) - - - -// a simple-minded regular expression matcher: -// it loops for examples like STAR(EMPTY) with -// strings this regular expression does not match - -def smatchers(rs: List[Rexp], s: List[Char]) : Boolean = (rs, s) match { - case (NULL::rs, s) => false - case (EMPTY::rs, s) => smatchers(rs, s) - case (CHAR(c)::rs, Nil) => false - case (CHAR(c)::rs, d::s) => (c ==d) && smatchers(rs, s) - case (ALT(r1, r2)::rs, s) => smatchers(r1::rs, s) || smatchers(r2::rs, s) - case (SEQ(r1, r2)::rs, s) => smatchers(r1::r2::rs, s) - case (STAR(r)::rs, s) => smatchers(rs, s) || smatchers(r::STAR(r)::rs, s) - case (Nil, s) => s == Nil -} - -def smatcher(r: Rexp, s: String) = smatchers(List(r), s.toList) - -// regular expression: a -println(smatcher(CHAR('a'), "ab")) - -// regular expression: a + (b o c) -println(smatcher(ALT(CHAR('a'), SEQ(CHAR('b'), CHAR('c'))), "ab")) - -// regular expression: a + (b o c) -println(smatcher(ALT(CHAR('a'), SEQ(CHAR('b'), CHAR('c'))), "bc")) - -// loops for regular expression epsilon* -//println(smatcher(STAR(EMPTY), "a")) - - - -// Regular expression matcher that works properly -//================================================ - -// nullable function: tests whether the regular -// expression can recognise the empty string -def nullable (r: Rexp) : Boolean = r match { - case NULL => false - case EMPTY => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true -} - -// derivative of a regular expression w.r.t. a character -def der (c: Char, r: Rexp) : Rexp = r match { - case NULL => NULL - case EMPTY => NULL - case CHAR(d) => if (c == d) EMPTY else NULL - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r) => SEQ(der(c, r), STAR(r)) -} - -// derivative w.r.t. a string (iterates der) -def ders (s: List[Char], r: Rexp) : Rexp = s match { - case Nil => r - case c::s => ders(s, der(c, r)) -} - -// main matcher function -def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) - - -//examples - -println(matcher(SEQ(STAR("a"), STAR("b")), "bbaaa")) -println(matcher(ALT(STAR("a"), STAR("b")), "")) -println(matcher("abc", "")) -println(matcher(STAR(ALT(EMPTY, "a")), "")) -println(matcher(STAR(EMPTY), "a")) -println(matcher("cab","cab")) -println(matcher(STAR("a"),"aaa")) -println(matcher("cab" ,"cab")) -println(matcher(STAR("a"),"aaa")) - - diff -r 47f86885d481 -r e85600529ca5 regexp2.scala --- a/regexp2.scala Sun Dec 23 00:38:56 2012 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,123 +0,0 @@ - -// regular expressions including NOT -abstract class Rexp - -case object NULL extends Rexp -case object EMPTY extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp -case class NOT(r: Rexp) extends Rexp - - -// some convenience for typing in regular expressions -def charlist2rexp(s : List[Char]) : Rexp = s match { - case Nil => EMPTY - case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) -} -implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) - - -// nullable function: tests whether the regular -// expression can recognise the empty string -def nullable (r: Rexp) : Boolean = r match { - case NULL => false - case EMPTY => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true - case NOT(r) => !(nullable(r)) -} - -// tests whether a regular expression -// cannot recognise more -def no_more (r: Rexp) : Boolean = r match { - case NULL => true - case EMPTY => false - case CHAR(_) => false - case ALT(r1, r2) => no_more(r1) && no_more(r2) - case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1) - case STAR(_) => false - case NOT(r) => !(no_more(r)) -} - - -// derivative of a regular expression w.r.t. a character -def der (c: Char, r: Rexp) : Rexp = r match { - case NULL => NULL - case EMPTY => NULL - case CHAR(d) => if (c == d) EMPTY else NULL - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r) => SEQ(der(c, r), STAR(r)) - case NOT(r) => NOT(der (c, r)) -} - - -// regular expression for specifying -// ranges of characters -def RANGE(s : List[Char]) : Rexp = s match { - case Nil => NULL - case c::Nil => CHAR(c) - case c::s => ALT(CHAR(c), RANGE(s)) -} - -//one or more -def PLUS(r: Rexp) = SEQ(r, STAR(r)) - - -//some regular expressions -val LOWERCASE = RANGE("abcdefghijklmnopqrstuvwxyz".toList) -val UPPERCASE = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZ".toList) -val LETTER = ALT(LOWERCASE, UPPERCASE) -val DIGIT = RANGE("0123456789".toList) -val NONZERODIGIT = RANGE("123456789".toList) - -val IDENT = SEQ(LETTER, STAR(ALT(LETTER,DIGIT))) -val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") -val WHITESPACE = RANGE(" \n".toList) -val WHITESPACES = PLUS(WHITESPACE) - -val ALL = ALT(ALT(LETTER, DIGIT), WHITESPACE) -val COMMENT = SEQ(SEQ("/*", NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL)))), "*/") - - -// an example list of regular expressions -val regs: List[Rexp]= List("if", "then", "else", "+", IDENT, NUMBER, WHITESPACES, COMMENT) - - -def error (s: String) = throw new IllegalArgumentException ("Could not lex " + s) - -def munch(r: Rexp, s: List[Char], t: List[Char]) : Option[(List[Char], List[Char])] = - s match { - case Nil if (nullable(r)) => Some(Nil, t) - case Nil => None - case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, t) - case c::s if (no_more(der (c, r))) => None - case c::s => munch(der (c, r), s, t ::: List(c)) - } - -def one_string (regs: List[Rexp], s: List[Char]) : (List[Char], List[Char]) = { - val somes = regs.map { munch(_, s, Nil) } .flatten - if (somes == Nil) error(s.mkString) else (somes sortBy (_._1.length) head) -} - -def tokenize (regs: List[Rexp], s: List[Char]) : List[String] = s match { - case Nil => Nil - case _ => one_string(regs, s) match { - case (rest, s) => s.mkString :: tokenize(regs, rest) - } -} - -//examples -println(tokenize(regs, "if true then then 42 else +".toList)) -println(tokenize(regs, "if+true+then+then+42+else +".toList)) -println(tokenize(regs, "ifff if 34 34".toList)) -println(tokenize(regs, "/*ifff if */ hhjj /*34 */".toList)) -println(tokenize(regs, "/* if true then */ then 42 else +".toList)) -//println(tokenize(regs, "ifff $ if 34".toList)) // causes an error because of the symbol $ diff -r 47f86885d481 -r e85600529ca5 regexp3.scala --- a/regexp3.scala Sun Dec 23 00:38:56 2012 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,141 +0,0 @@ - -// regular expressions including NOT -abstract class Rexp - -case object NULL extends Rexp -case object EMPTY extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp -case class NOT(r: Rexp) extends Rexp - - -// some convenience for typing in regular expressions -def charlist2rexp(s : List[Char]) : Rexp = s match { - case Nil => EMPTY - case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) -} -implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) - - -// nullable function: tests whether the regular -// expression can recognise the empty string -def nullable (r: Rexp) : Boolean = r match { - case NULL => false - case EMPTY => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true - case NOT(r) => !(nullable(r)) -} - -// tests whether a regular expression -// cannot recognise more -def no_more (r: Rexp) : Boolean = r match { - case NULL => true - case EMPTY => false - case CHAR(_) => false - case ALT(r1, r2) => no_more(r1) && no_more(r2) - case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1) - case STAR(_) => false - case NOT(r) => !(no_more(r)) -} - - -// derivative of a regular expression w.r.t. a character -def der (c: Char, r: Rexp) : Rexp = r match { - case NULL => NULL - case EMPTY => NULL case CHAR(d) => if (c == d) EMPTY else NULL - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r) => SEQ(der(c, r), STAR(r)) - case NOT(r) => NOT(der (c, r)) -} - -// regular expression for specifying -// ranges of characters -def RANGE(s : List[Char]) : Rexp = s match { - case Nil => NULL - case c::Nil => CHAR(c) - case c::s => ALT(CHAR(c), RANGE(s)) -} - -// one or more -def PLUS(r: Rexp) = SEQ(r, STAR(r)) - -// some regular expressions -val LOWERCASE = RANGE("abcdefghijklmnopqrstuvwxyz".toList) -val UPPERCASE = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZ".toList) -val LETTER = ALT(LOWERCASE, UPPERCASE) -val DIGIT = RANGE("0123456789".toList) -val NONZERODIGIT = RANGE("123456789".toList) - -val IDENT = SEQ(LETTER, STAR(ALT(LETTER,DIGIT))) -val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") -val WHITESPACE = RANGE(" \n".toList) -val WHITESPACES = PLUS(WHITESPACE) - -val ALL = ALT(ALT(LETTER, DIGIT), WHITESPACE) -val COMMENT = SEQ(SEQ("/*", NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL)))), "*/") - - -// for classifying the strings that have been recognised -abstract class Token - -case object T_WHITESPACE extends Token -case object T_COMMENT extends Token -case class T_IDENT(s: String) extends Token -case class T_OP(s: String) extends Token -case class T_NUM(n: Int) extends Token -case class T_KEYWORD(s: String) extends Token - - -// an example list of syntactic rules -type Rule = (Rexp, List[Char] => Token) - -val rules: List[Rule]= - List(("if", (s) => T_KEYWORD(s.mkString)), - ("then", (s) => T_KEYWORD(s.mkString)), - ("else", (s) => T_KEYWORD(s.mkString)), - ("+", (s) => T_OP(s.mkString)), - (IDENT, (s) => T_IDENT(s.mkString)), - (NUMBER, (s) => T_NUM(s.mkString.toInt)), - (WHITESPACES, (s) => T_WHITESPACE), - (COMMENT, (s) => T_COMMENT)) - - -def error (s: String) = throw new IllegalArgumentException ("Cannot tokenize: " + s) - -def munch(r: Rexp, action: List[Char] => Token, s: List[Char], t: List[Char]) : Option[(List[Char], Token)] = - s match { - case Nil if (nullable(r)) => Some(Nil, action(t)) - case Nil => None - case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, action(t)) - case c::s if (no_more(der (c, r))) => None - case c::s => munch(der (c, r), action, s, t ::: List(c)) - } - -def one_token (rs: List[Rule], s: List[Char]) : (List[Char], Token) = { - val somes = rs.map { (r) => munch(r._1, r._2, s, Nil) } .flatten - if (somes == Nil) error(s.mkString) else (somes sortBy (_._1.length) head) -} - -def tokenize (rs: List[Rule], s: List[Char]) : List[Token] = s match { - case Nil => Nil - case _ => one_token(rs, s) match { - case (rest, token) => token :: tokenize(rs, rest) - } -} - -//examples -println(tokenize(rules, "if true then then 42 else +".toList)) -println(tokenize(rules, "if+true+then+then+42+else +".toList)) -println(tokenize(rules, "ifff if 34 34".toList)) -println(tokenize(rules, "/*ifff if */ hhjj /*34 */".toList)) -println(tokenize(rules, "/* if true then */ then 42 else +".toList)) -//println(tokenize(rules, "ifff $ if 34".toList)) // causes an error because of the symbol $ diff -r 47f86885d481 -r e85600529ca5 scala/S_grammar-token.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/S_grammar-token.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,47 @@ +//:load matcher.scala +//:load parser3.scala + +abstract class Token +case object T_ONE extends Token + +val lexing_rules : List[Rule[Token]] = + List(("1", (s: List[Char]) => T_ONE)) + +val T = Tokenizer(lexing_rules) + +case class TokParser(tok: Token) extends Parser[List[Token], Token] { + def parse(ts: List[Token]) = ts match { + case t::ts if (t == tok) => Set((t, ts)) + case _ => Set () + } +} +implicit def token2tokparser(t: Token) = TokParser(t) + +case object EmpParser extends Parser[List[Token], String] { + def parse(ts: List[Token]) = Set(("", ts)) +} + + +lazy val Su: Parser[List[Token], String] = + (T_ONE ~ Su) ==> { case (x, y) => "1" + y} || EmpParser + + +def time_needed[T](i: Int, code: => T) = { + val start = System.nanoTime() + for (j <- 1 to i) code + val end = System.nanoTime() + (end - start)/(i * 1.0e9) +} + +def test(i: Int) = { + val result = Su.parse_all(T.fromString("1" * i)) + //print(result.size + " ") +} + + +for (i <- 1 to 1000 by 50) { + print(i + " ") + print("%.5f".format(time_needed(1, test(i)))) + print("\n") +} + diff -r 47f86885d481 -r e85600529ca5 scala/S_grammar.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/S_grammar.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,52 @@ +//:load parser3.scala + +case class StringParser(s: String) extends Parser[String, String] { + def parse(ts: String) = { + if (s.length <= ts.length && ts.startsWith(s)) Set((s, ts.drop(s.length))) + else Set() + } +} + +implicit def string2parser(s: String) = StringParser(s) + +def time_needed[T](i: Int, code: => T) = { + val start = System.nanoTime() + for (j <- 1 to i) code + val end = System.nanoTime() + (end - start)/(i * 1.0e9) +} + +// unambiguous grammar + +lazy val U: Parser[String, String] = + ("1" ~ U) ==> { case (x, y) => "1" + y} || "" + +def test1(i: Int) = { + val result = U.parse_all("1" * i) + //print(result.size + " ") +} + +for (i <- 1 to 1000 by 50) { + print(i + " ") + print("%.5f".format(time_needed(1, test1(i)))) + print("\n") +} + + + +// ambiguous grammar +// n = 16 -> over 35 million parse trees + +lazy val S: Parser[String, String] = + ("1" ~ S ~ S) ==> { case ((x, y), z) => "1" + y + z} || "" + +def test2(i: Int) = { + val result = S.parse_all("1" * i) + print(result.size + " ") +} + +for (i <- 1 to 30) { + print(i + " ") + print("%.5f".format(time_needed(1, test2(i)))) + print("\n") +} diff -r 47f86885d481 -r e85600529ca5 scala/Term_grammar.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/Term_grammar.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,62 @@ +//:load matcher.scala +//:load parser3.scala + +// some regular expressions +val LETTER = RANGE("abcdefghijklmnopqrstuvwxyz") +val ID = PLUS(LETTER) + +val DIGIT = RANGE("0123456789") +val NONZERODIGIT = RANGE("123456789") +val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") + +val LPAREN = CHAR('(') +val RPAREN = CHAR(')') + +val WHITESPACE = PLUS(RANGE(" \n")) +val OPS = RANGE("+-*") + +// for classifying the strings that have been lexed +abstract class Token + +case object T_WHITESPACE extends Token +case class T_NUM(s: String) extends Token +case class T_OP(s: String) extends Token +case object T_LPAREN extends Token +case object T_RPAREN extends Token + + +// lexing rules for arithmetic expressions +val lexing_rules: List[Rule[Token]]= + List((NUMBER, (s) => T_NUM(s.mkString)), + (WHITESPACE, (s) => T_WHITESPACE), + (LPAREN, (s) => T_LPAREN), + (RPAREN, (s) => T_RPAREN), + (OPS, (s) => T_OP(s.mkString))) + +val Tk = Tokenizer(lexing_rules, List(T_WHITESPACE)) + + +case class TokParser(tok: Token) extends Parser[List[Token], Token] { + def parse(ts: List[Token]) = ts match { + case t::ts if (t == tok) => Set((t, ts)) + case _ => Set () + } +} +implicit def token2tparser(t: Token) = TokParser(t) + +case object NumParser extends Parser[List[Token], Int] { + def parse(ts: List[Token]) = ts match { + case T_NUM(s)::ts => Set((s.toInt, ts)) + case _ => Set () + } +} + +lazy val E: Parser[List[Token], Int] = (T ~ T_OP("+") ~ E) ==> { case ((x, y), z) => x + z } || T +lazy val T: Parser[List[Token], Int] = (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => x * z } || F +lazy val F: Parser[List[Token], Int] = (T_LPAREN ~> E <~ T_RPAREN) || NumParser + +println(E.parse_all(Tk.fromString("1 + 2 + 3"))) +println(E.parse_all(Tk.fromString("1 + 2 * 3"))) +println(E.parse_all(Tk.fromString("(1 + 2) * 3"))) +println(E.parse_all(Tk.fromString("(14 + 2) * (3 + 2)"))) + diff -r 47f86885d481 -r e85600529ca5 scala/app0.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/app0.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,6 @@ +import io.Source + +def get_page(url: String) : String = { + Source.fromURL(url).take(10000).mkString + + diff -r 47f86885d481 -r e85600529ca5 scala/app1.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/app1.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,12 @@ +def get_page(url: String) : String = { + try { + Source.fromURL(url).take(10000).mkString + } + catch { + case e => { + println(" Problem with: " + url) + "" + } + } +} + diff -r 47f86885d481 -r e85600529ca5 scala/app2.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/app2.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,16 @@ +val http_pattern = """\"https?://[^\"]*\"""".r + +def unquote(s: String) = s.drop(1).dropRight(1) + +def get_all_URLs(page: String) : Set[String] = { + (http_pattern.findAllIn(page)).map { unquote(_) }.toSet +} + +def crawl(url: String, n: Int) : Unit = { + if (n == 0) () + else { + println("Visiting: " + n + " " + url) + for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1) + } +} + diff -r 47f86885d481 -r e85600529ca5 scala/app3.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/app3.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,10 @@ +val my_urls = """urbanc""".r + +def crawl(url: String, n: Int) : Unit = { + if (n == 0) () + else if (my_urls.findFirstIn(url) == None) () + else { + println("Visiting: " + n + " " + url) + for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1) + } +} diff -r 47f86885d481 -r e85600529ca5 scala/app4.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/app4.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,14 @@ +val http_pattern = """\"https?://[^\"]*\"""".r +val my_urls = """urbanc""".r +val email_pattern = + """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r + +def crawl(url: String, n: Int) : Unit = { + if (n == 0) () + else { + println("Visiting: " + n + " " + url) + val page = get_page(url) + println(email_pattern.findAllIn(page).mkString("\n")) + for (u <- get_all_URLs(page)) crawl(u, n - 1) + } +} diff -r 47f86885d481 -r e85600529ca5 scala/app5.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/app5.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,8 @@ +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true +} diff -r 47f86885d481 -r e85600529ca5 scala/app51.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/app51.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,8 @@ +abstract class Rexp + +case object NULL extends Rexp +case object EMPTY extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp diff -r 47f86885d481 -r e85600529ca5 scala/app6.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/app6.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,11 @@ +def deriv (r: Rexp, c: Char) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL + case CHAR(d) => if (c == d) EMPTY else NULL + case ALT(r1, r2) => ALT(deriv(r1, c), deriv(r2, c)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(deriv(r1, c), r2), deriv(r2, c)) + else SEQ(deriv(r1, c), r2) + case STAR(r) => SEQ(deriv(r, c), STAR(r)) +} + diff -r 47f86885d481 -r e85600529ca5 scala/app7.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/app7.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,16 @@ +abstract class Parser[I, T] { + def parse(ts: I): Set[(T, I)] + + def parse_all(ts: I) : Set[T] = + for ((head, tail) <- parse(ts); if (tail.isEmpty)) + yield head + + def || (right : => Parser[I, T]) : Parser[I, T] = + new AltParser(this, right) + def ==>[S] (f: => T => S) : Parser [I, S] = + new FunParser(this, f) + def ~[S] (right : => Parser[I, S]) : Parser[I, (T, S)] = + new SeqParser(this, right) +} + + diff -r 47f86885d481 -r e85600529ca5 scala/app8.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/app8.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,23 @@ +class SeqParser[I, T, S](p: => Parser[I, T], + q: => Parser[I, S]) + extends Parser[I, (T, S)] { + def parse(sb: I) = + for ((head1, tail1) <- p.parse(sb); + (head2, tail2) <- q.parse(tail1)) + yield ((head1, head2), tail2) +} + +class AltParser[I, T](p: => Parser[I, T], + q: => Parser[I, T]) + extends Parser[I, T] { + def parse(sb: I) = p.parse(sb) ++ q.parse(sb) +} + +class FunParser[I, T, S](p: => Parser[I, T], f: T => S) + extends Parser[I, S] { + def parse(sb: I) = + for ((head, tail) <- p.parse(sb)) + yield (f(head), tail) +} + + diff -r 47f86885d481 -r e85600529ca5 scala/automata.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/automata.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,106 @@ + +// a class for deterministic finite automata, +// the type of states is kept polymorphic + +case class Automaton[A](start: A, states: Set[A], delta: Map[(A, Char), A], fins: Set[A]) { + + // the transition function lifted to list of characters + def deltas(q: A, cs: List[Char]) : Either[A, String] = + if (states.contains(q)) cs match { + case Nil => Left(q) + case c::cs => + if (delta.isDefinedAt(q, c)) deltas(delta(q, c), cs) + else Right(q + " does not have a transition for " + c) + } + else Right(q + " is not a state of the automaton") + + // wether a string is accepted by the automaton + def accepts(s: String) = deltas(start, s.toList) match { + case Left(q) => fins.contains(q) + case _ => false + } +} + + +// translating a regular expression into a finite +// automaton + +abstract class Rexp + +case object NULL extends Rexp +case object EMPTY extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp + +implicit def string2rexp(s : String) = { + def chars2rexp (cs: List[Char]) : Rexp = cs match { + case Nil => EMPTY + case c::Nil => CHAR(c) + case c::cs => SEQ(CHAR(c), chars2rexp(cs)) + } + chars2rexp(s.toList) +} + +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true +} + +def der (r: Rexp, c: Char) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL + case CHAR(d) => if (c == d) EMPTY else NULL + case ALT(r1, r2) => ALT(der(r1, c), der(r2, c)) + case SEQ(r1, r2) => if (nullable(r1)) ALT(SEQ(der(r1, c), r2), der(r2, c)) + else SEQ(der(r1, c), r2) + case STAR(r) => SEQ(der(r, c), STAR(r)) +} + + +// Here we construct an automaton whose +// states are regular expressions +type State = Rexp +type States = Set[State] +type Transition = Map[(State, Char), State] + +// we use as an alphabet all lowercase letters +val alphabet = "abcdefghijklmnopqrstuvwxyz".toSet + +def goto(q: State, c: Char, qs: States, delta: Transition) : (States, Transition) = { + val q_der : State = der(q, c) + if (qs.contains(q_der)) (qs, delta + ((q, c) -> q)) + else explore(qs + q_der, delta + ((q, c) -> q_der), q_der) +} + +def explore (qs: States, delta: Transition, q: State) : (States, Transition) = + alphabet.foldRight[(States, Transition)] (qs, delta) ((c, qsd) => goto(q, c, qsd._1, qsd._2)) + + +def mk_automaton (r: Rexp) : Automaton[Rexp] = { + val (qs, delta) = explore(Set(r), Map(), r); + val fins = for (q <- qs if nullable(q)) yield q; + Automaton[Rexp](r, qs, delta, fins) +} + +val A = mk_automaton(ALT("ab","ac")) + +A.start +A.states.toList.length + +println(A.accepts("bd")) +println(A.accepts("ab")) +println(A.accepts("ac")) + +val r1 = STAR(ALT("a","b")) +val r2 = SEQ("b","b") +val r3 = SEQ(SEQ(SEQ(r1, r2), r1), "a") +val B = mk_automaton(r3) + +B.start +B.states.toList.length diff -r 47f86885d481 -r e85600529ca5 scala/automata1.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/automata1.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,95 @@ + +// a class for deterministic finite automata, +// the type of states is kept polymorphic + +case class Automaton[A](start: A, states: Set[A], delta: Map[(A, Char), A], fins: Set[A]) { + + // the transition function lifted to list of characters + def deltas(q: A, cs: List[Char]) : A = + if (states.contains(q)) cs match { + case Nil => q + case c::cs => + if (delta.isDefinedAt(q, c)) deltas(delta(q, c), cs) + else throw new RuntimeException(q + " does not have a transition for " + c) + } + else throw new RuntimeException(q + " is not a state of the automaton") + + // wether a string is accepted by the automaton + def accepts(s: String) = + try { + fins.contains(deltas(start, s.toList)) + } catch { + case e:RuntimeException => false + } +} + + + +abstract class Rexp + +case object NULL extends Rexp +case object EMPTY extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp + +implicit def string2rexp(s : String) = { + def chars2rexp (cs: List[Char]) : Rexp = cs match { + case Nil => EMPTY + case c::Nil => CHAR(c) + case c::cs => SEQ(CHAR(c), chars2rexp(cs)) + } + chars2rexp(s.toList) +} + +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true +} + +def der (r: Rexp, c: Char) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL + case CHAR(d) => if (c == d) EMPTY else NULL + case ALT(r1, r2) => ALT(der(r1, c), der(r2, c)) + case SEQ(r1, r2) => if (nullable(r1)) ALT(SEQ(der(r1, c), r2), der(r2, c)) + else SEQ(der(r1, c), r2) + case STAR(r) => SEQ(der(r, c), STAR(r)) +} + + +// Here we construct an automaton whose +// states are regular expressions +type State = Rexp +type States = Set[State] +type Transition = Map[(State, Char), State] + +def goto(q: State, c: Char, qs: States, delta: Transition) : (States, Transition) = { + val qc : State = der(q, c) + if (qs.contains(qc)) (qs, delta + ((q, c) -> q)) + else explore(qs + qc, delta + ((q, c) -> qc), qc) +} + +// we use as alphabet all lowercase letters +val alphabet = "abcdefghijklmnopqrstuvwxyz".toSet + +def explore (qs: States, delta: Transition, q: State) : (States, Transition) = + alphabet.foldRight[(States, Transition)] (qs, delta) ((c, qsd) => goto(q, c, qsd._1, qsd._2)) + + +def mk_automaton (r: Rexp) : Automaton[Rexp] = { + val (qs, delta) = explore(Set(r), Map(), r); + val fins = for (q <- qs if nullable(q)) yield q; + Automaton[Rexp](r, qs, delta, fins) +} + +val A = mk_automaton(ALT("ab","ac")) + +println(A.accepts("bd")) +println(A.accepts("ab")) +println(A.accepts("ac")) diff -r 47f86885d481 -r e85600529ca5 scala/compile.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/compile.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,326 @@ +// A parser and evaluator for teh while language +// +import matcher._ +import parser._ + +// some regular expressions +val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_") +val DIGIT = RANGE("0123456789") +val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT))) +val NUM = PLUS(DIGIT) +val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false", "write") +val SEMI: Rexp = ";" +val OP: Rexp = ALTS(":=", "=", "-", "+", "*", "!=", "<", ">") +val WHITESPACE = PLUS(RANGE(" \n")) +val RPAREN: Rexp = ")" +val LPAREN: Rexp = "(" +val BEGIN: Rexp = "{" +val END: Rexp = "}" +val COMMENT = SEQS("/*", NOT(SEQS(STAR(ALLC), "*/", STAR(ALLC))), "*/") + +// tokens for classifying the strings that have been recognised +abstract class Token +case object T_WHITESPACE extends Token +case object T_COMMENT extends Token +case object T_SEMI extends Token +case object T_LPAREN extends Token +case object T_RPAREN extends Token +case object T_BEGIN extends Token +case object T_END extends Token +case class T_ID(s: String) extends Token +case class T_OP(s: String) extends Token +case class T_NUM(s: String) extends Token +case class T_KWD(s: String) extends Token + +val lexing_rules: List[(Rexp, List[Char] => Token)] = + List((KEYWORD, (s) => T_KWD(s.mkString)), + (ID, (s) => T_ID(s.mkString)), + (OP, (s) => T_OP(s.mkString)), + (NUM, (s) => T_NUM(s.mkString)), + (SEMI, (s) => T_SEMI), + (LPAREN, (s) => T_LPAREN), + (RPAREN, (s) => T_RPAREN), + (BEGIN, (s) => T_BEGIN), + (END, (s) => T_END), + (WHITESPACE, (s) => T_WHITESPACE), + (COMMENT, (s) => T_COMMENT)) + +// the tokenizer +val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE, T_COMMENT)) + +// the abstract syntax trees +abstract class Stmt +abstract class AExp +abstract class BExp +type Block = List[Stmt] +case object Skip extends Stmt +case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt +case class While(b: BExp, bl: Block) extends Stmt +case class Assign(s: String, a: AExp) extends Stmt +case class Write(s: String) extends Stmt + +case class Var(s: String) extends AExp +case class Num(i: Int) extends AExp +case class Aop(o: String, a1: AExp, a2: AExp) extends AExp + +case object True extends BExp +case object False extends BExp +case class Relop(o: String, a1: AExp, a2: AExp) extends BExp + +// atomic parsers +case class TokParser(tok: Token) extends Parser[List[Token], Token] { + def parse(ts: List[Token]) = ts match { + case t::ts if (t == tok) => Set((t, ts)) + case _ => Set () + } +} +implicit def token2tparser(t: Token) = TokParser(t) + +case object NumParser extends Parser[List[Token], Int] { + def parse(ts: List[Token]) = ts match { + case T_NUM(s)::ts => Set((s.toInt, ts)) + case _ => Set () + } +} + +case object IdParser extends Parser[List[Token], String] { + def parse(ts: List[Token]) = ts match { + case T_ID(s)::ts => Set((s, ts)) + case _ => Set () + } +} + + +// arithmetic expressions +lazy val AExp: Parser[List[Token], AExp] = + (T ~ T_OP("+") ~ AExp) ==> { case ((x, y), z) => Aop("+", x, z): AExp } || + (T ~ T_OP("-") ~ AExp) ==> { case ((x, y), z) => Aop("-", x, z): AExp } || T +lazy val T: Parser[List[Token], AExp] = + (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F +lazy val F: Parser[List[Token], AExp] = + (T_LPAREN ~> AExp <~ T_RPAREN) || + IdParser ==> Var || + NumParser ==> Num + +// boolean expressions +lazy val BExp: Parser[List[Token], BExp] = + (T_KWD("true") ==> ((_) => True: BExp)) || + (T_KWD("false") ==> ((_) => False: BExp)) || + (T_LPAREN ~> BExp <~ T_RPAREN) || + (AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Relop("=", x, z): BExp } || + (AExp ~ T_OP("!=") ~ AExp) ==> { case ((x, y), z) => Relop("!=", x, z): BExp } || + (AExp ~ T_OP("<") ~ AExp) ==> { case ((x, y), z) => Relop("<", x, z): BExp } || + (AExp ~ T_OP(">") ~ AExp) ==> { case ((x, y), z) => Relop("<", z, x): BExp } + +lazy val Stmt: Parser[List[Token], Stmt] = + (T_KWD("skip") ==> ((_) => Skip: Stmt)) || + (IdParser ~ T_OP(":=") ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } || + (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Block ~ T_KWD("else") ~ Block) ==> + { case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } || + (T_KWD("while") ~ BExp ~ T_KWD("do") ~ Block) ==> { case (((x, y), z), w) => While(y, w) } || + (T_KWD("write") ~ IdParser) ==> { case (x, y) => Write(y) } + +lazy val Stmts: Parser[List[Token], Block] = + (Stmt ~ T_SEMI ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } || + (Stmt ==> ((s) => List(s) : Block)) + +lazy val Block: Parser[List[Token], Block] = + (T_BEGIN ~> Stmts <~ T_END) || + (Stmt ==> ((s) => List(s))) + +// compiler +val beginning = """ +.class public XXX.XXX +.super java/lang/Object + +.method public ()V + aload_0 + invokenonvirtual java/lang/Object/()V + return +.end method + +.method public static write(I)V + .limit locals 5 + .limit stack 5 + iload 0 + getstatic java/lang/System/out Ljava/io/PrintStream; + swap + invokevirtual java/io/PrintStream/println(I)V + return +.end method + + +.method public static main([Ljava/lang/String;)V + .limit locals 200 + .limit stack 200 + +""" + +val ending = """ + + return + +.end method +""" + +// for generating new labels +var counter = -1 + +def Fresh(x: String) = { + counter += 1 + x ++ "_" ++ counter.toString() +} + +type Env = Map[String, String] +type Instrs = List[String] + +def compile_aexp(a: AExp, env : Env) : Instrs = a match { + case Num(i) => List("ldc " + i.toString + "\n") + case Var(s) => List("iload " + env(s) + "\n") + case Aop("+", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("iadd\n") + case Aop("-", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("isub\n") + case Aop("*", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("imul\n") +} + +def compile_bexp(b: BExp, env : Env, jmp: String) : Instrs = b match { + case True => Nil + case False => List("goto " + jmp + "\n") + case Relop("=", a1, a2) => + compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpne " + jmp + "\n") + case Relop("!=", a1, a2) => + compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpeq " + jmp + "\n") + case Relop("<", a1, a2) => + compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpge " + jmp + "\n") +} + + +def compile_stmt(s: Stmt, env: Env) : (Instrs, Env) = s match { + case Skip => (Nil, env) + case Assign(x, a) => { + val index = if (env.isDefinedAt(x)) env(x) else env.keys.size.toString + (compile_aexp(a, env) ++ + List("istore " + index + "\n"), env + (x -> index)) + } + case If(b, bl1, bl2) => { + val if_else = Fresh("If_else") + val if_end = Fresh("If_end") + val (instrs1, env1) = compile_bl(bl1, env) + val (instrs2, env2) = compile_bl(bl2, env1) + (compile_bexp(b, env, if_else) ++ + instrs1 ++ + List("goto " + if_end + "\n") ++ + List("\n" + if_else + ":\n\n") ++ + instrs2 ++ + List("\n" + if_end + ":\n\n"), env2) + } + case While(b, bl) => { + val loop_begin = Fresh("Loop_begin") + val loop_end = Fresh("Loop_end") + val (instrs1, env1) = compile_bl(bl, env) + (List("\n" + loop_begin + ":\n\n") ++ + compile_bexp(b, env, loop_end) ++ + instrs1 ++ + List("goto " + loop_begin + "\n") ++ + List("\n" + loop_end + ":\n\n"), env1) + } + case Write(x) => + (List("iload " + env(x) + "\n" + "invokestatic XXX/XXX/write(I)V\n"), env) +} + +def compile_bl(bl: Block, env: Env) : (Instrs, Env) = bl match { + case Nil => (Nil, env) + case s::bl => { + val (instrs1, env1) = compile_stmt(s, env) + val (instrs2, env2) = compile_bl(bl, env1) + (instrs1 ++ instrs2, env2) + } +} + +def compile(input: String) : String = { + val class_name = input.split('.')(0) + val tks = Tok.fromFile(input) + val ast = Stmts.parse_single(tks) + val instructions = compile_bl(ast, Map.empty)._1 + (beginning ++ instructions.mkString ++ ending).replaceAllLiterally("XXX", class_name) +} + + +def compile_to(input: String, output: String) = { + val fw = new java.io.FileWriter(output) + fw.write(compile(input)) + fw.close() +} + +// +val tks = Tok.fromString("x := x + 1") +val ast = Stmt.parse_single(tks) +println(compile_stmt(ast, Map("x" -> "n"))._1.mkString) + + + +//examples + +compile_to("loops.while", "loops.j") +//compile_to("fib.while", "fib.j") + + +// testing cases for time measurements + +def time_needed[T](i: Int, code: => T) = { + val start = System.nanoTime() + for (j <- 1 to i) code + val end = System.nanoTime() + (end - start)/(i * 1.0e9) +} + +// for testing +import scala.sys.process._ + +val test_prog = """ +start := XXX; +x := start; +y := start; +z := start; +while 0 < x do { + while 0 < y do { + while 0 < z do { + z := z - 1 + }; + z := start; + y := y - 1 + }; + y := start; + x := x - 1 +}; +write x; +write y; +write z +""" + + +def compile_test(n: Int) : Unit = { + val class_name = "LOOP" + val tks = Tok.fromString(test_prog.replaceAllLiterally("XXX", n.toString)) + val ast = Stmts.parse_single(tks) + val instructions = compile_bl(ast, Map.empty)._1 + val assembly = (beginning ++ instructions.mkString ++ ending).replaceAllLiterally("XXX", class_name) + val fw = new java.io.FileWriter(class_name + ".j") + fw.write(assembly) + fw.close() + val test = ("java -jar jvm/jasmin-2.4/jasmin.jar " + class_name + ".j").!! + println(n + " " + time_needed(2, ("java " + class_name + "/" + class_name).!!)) +} + +List(1, 5000, 10000, 50000, 100000, 250000, 500000, 750000, 1000000).map(compile_test(_)) + + + +// Javabyte code assmbler +// +// java -jar jvm/jasmin-2.4/jasmin.jar loops.j + + + + + + diff -r 47f86885d481 -r e85600529ca5 scala/crawler.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/crawler.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,144 @@ +import io.Source +import scala.util.matching.Regex + +// gets the first ~10K of a page +def get_page(url: String) : String = { + try { + Source.fromURL(url).take(10000).mkString + } + catch { + case e => { + println(" Problem with: " + url) + "" + } + } +} + +// non-existing page -> returns the empty string +get_page("""http://www.foobar.com""") + + +// staring URL for the crawler +val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc/""" + +// starts with an " +// then either http or https +// then :// +// then any character that is not " +// finally " +val http_pattern = """\"((?:http|https)://(?:[^\"])*)\"""".r +val http_pattern = """\"(https?://[^\"]*)\"""".r + +def unquote(s: String) = s.drop(1).dropRight(1) + +def get_all_URLs(page: String) : Set[String] = { + (http_pattern.findAllIn(page)).map { unquote(_) }.toSet +} + +// get all urls in startURL +get_all_URLs(get_page(startURL)) + +// number of all urls in startURL +get_all_URLs(get_page(startURL)).toList.length + + +// naive version - seraches until a given depth +// visits pages potentially more than once +def crawl(url: String, n: Int) : Unit = { + if (n == 0) () + else { + println("Visiting: " + n + " " + url) + for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1) + } +} + +crawl(startURL, 2) + + +//breadth-first version without visiting +//pages twice +def bf_crawl(todo: Set[String], visited: Set[String], n: Int) : Unit = { + if (n == 0) () + else { + val new_todo = todo.flatMap { + url => { + if (visited.contains(url)) Set[String]() + else { + println("Visiting: " + n + " " + url) + get_all_URLs(get_page(url)) + } + } + } + bf_crawl(new_todo, visited union todo, n - 1) + } +} + +bf_crawl(Set(startURL1), Set(), 2) + + +//breadth-first version without visiting +//pages twice and only in "my" domain +val my_pattern = """urbanc""".r + +// breadth first search avoiding double searches +def bf_crawl2(todo: Set[String], visited: Set[String], n: Int) : Unit = { + if (n == 0) () + else { + val new_todo = todo.flatMap { + url => { + if (visited.contains(url)) Set[String]() + else if (my_pattern.findFirstIn(url) == None) Set[String]() + else { + println("Visiting: " + n + " " + url); + get_all_URLs(get_page(url)) + } + } + } + bf_crawl2(new_todo, visited union todo, n - 1) + } +} + +bf_crawl2(Set(startURL1), Set(), 5) + +// email harvester +// from +// http://net.tutsplus.com/tutorials/other/8-regular-expressions-you-should-know/ + +val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r + +def bf_crawl3(todo: Set[String], visited: Set[String], n: Int) : Unit = { + if (n == 0) () + else { + val new_todo = todo.flatMap { + url => { + if (visited.contains(url)) Set[String]() + else { + println("Visiting: " + n + " " + url); + val page = get_page(url) + println(email_pattern.findAllIn(page).mkString("\n")) + get_all_URLs(get_page(url)) + } + } + } + bf_crawl3(new_todo, visited union todo, n - 1) + } +} + +bf_crawl3(Set(startURL1), Set(), 3) + + +// depth-first version does not work, +// because it might visit pages at depth 1 +// while it still wants to visit them at +// depth 2 +var visited = Set("") + +def crawl(url: String, n: Int) : Unit = { + if (n == 0) () + else if (visited.contains(url)) () //println("Already visited: " + n + " " + url) + else { + println("Visiting: " + n + " " + url); + visited += url + for (u <- getAllURLs(getURLpage(url))) crawl(u, n - 1); + } +} diff -r 47f86885d481 -r e85600529ca5 scala/crawler1.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/crawler1.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,45 @@ +import io.Source +import scala.util.matching.Regex + +// gets the first ~10K of a page +def get_page(url: String) : String = { + try { + Source.fromURL(url).take(10000).mkString + } + catch { + case e => { + println(" Problem with: " + url) + "" + } + } +} + + +// regex for URLs +val http_pattern = """\"https?://[^\"]*\"""".r + +def unquote(s: String) = s.drop(1).dropRight(1) + +def get_all_URLs(page: String) : Set[String] = { + (http_pattern.findAllIn(page)).map { unquote(_) }.toSet +} + +// naive version - seraches until a given depth +// visits pages potentially more than once +def crawl(url: String, n: Int) : Unit = { + if (n == 0) () + else { + println("Visiting: " + n + " " + url) + for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1) + } +} + +// staring URL for the crawler +val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc/""" +//val startURL = """http://www.inf.kcl.ac.uk/staff/mml/""" + + +// call on the command line +crawl(startURL, 2) + +crawl("""http://www.dcs.kcl.ac.uk/staff/urbanc/msc-projects-12.html""", 2) diff -r 47f86885d481 -r e85600529ca5 scala/crawler2.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/crawler2.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,44 @@ +import io.Source +import scala.util.matching.Regex + +// gets the first ~10K of a page +def get_page(url: String) : String = { + try { + Source.fromURL(url).take(10000).mkString + } + catch { + case e => { + println(" Problem with: " + url) + "" + } + } +} + +// staring URL for the crawler +val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc/""" + +// regex for URLs +val http_pattern = """\"https?://[^\"]*\"""".r +val my_urls = """urbanc""".r + +def unquote(s: String) = s.drop(1).dropRight(1) + +def get_all_URLs(page: String) : Set[String] = { + (http_pattern.findAllIn(page)).map { unquote(_) }.toSet +} + +// naive version - seraches until a given depth +// visits pages potentially more than once +def crawl(url: String, n: Int) : Unit = { + if (n == 0) () + else if (my_urls.findFirstIn(url) == None) () + else { + println("Visiting: " + n + " " + url) + for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1) + } +} + +// can now deal with depth 3 +// start on command line +crawl(startURL, 4) + diff -r 47f86885d481 -r e85600529ca5 scala/crawler3.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/crawler3.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,49 @@ +import io.Source +import scala.util.matching.Regex + +// gets the first ~10K of a page +def get_page(url: String) : String = { + try { + Source.fromURL(url).take(10000).mkString + } + catch { + case e => { + println(" Problem with: " + url) + "" + } + } +} + +// staring URL for the crawler +val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc/""" + +// regex for URLs +val http_pattern = """\"https?://[^\"]*\"""".r +val my_urls = """urbanc""".r +val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r + +// http://net.tutsplus.com/tutorials/other/8-regular-expressions-you-should-know/ + +def unquote(s: String) = s.drop(1).dropRight(1) + +def get_all_URLs(page: String) : Set[String] = { + (http_pattern.findAllIn(page)).map { unquote(_) }.toSet +} + +// naive version - seraches until a given depth +// visits pages potentially more than once +def crawl(url: String, n: Int) : Unit = { + if (n == 0) () + //else if (my_urls.findFirstIn(url) == None) () + else { + println("Visiting: " + n + " " + url) + val page = get_page(url) + println(email_pattern.findAllIn(page).mkString("\n")) + for (u <- get_all_URLs(page)) crawl(u, n - 1) + } +} + +// can now deal with depth 3 +// start on command line +crawl(startURL, 3) + diff -r 47f86885d481 -r e85600529ca5 scala/html.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/html.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,99 @@ + +//:load matcher.scala + +// some regular expressions +val SYM = RANGE("""ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz.,!?-{[()]}':;%0123456789""") +val WORD = PLUS(SYM) + +val BTAG = SEQS("<", WORD, ">") +val ETAG = SEQS("") + +val WHITESPACE = PLUS(RANGE(" \n")) + +// for classifying the strings that have been recognised +abstract class Token +case object T_WHITESPACE extends Token +case class T_WORD(s: String) extends Token +case class T_ETAG(s: String) extends Token +case class T_BTAG(s: String) extends Token +case class T_NT(s: String, rhs: List[Token]) extends Token + +val lexing_rules: List[Rule[Token]] = + List((BTAG, (s) => T_BTAG(s.mkString)), + (ETAG, (s) => T_ETAG(s.mkString)), + (WORD, (s) => T_WORD(s.mkString)), + (WHITESPACE, (s) => T_WHITESPACE)) + +// the tokenizer +val T = Tokenizer(lexing_rules) + +// width for printing +val WIDTH = 60 + + +def interpret(ts: List[Token], c: Int, ctr: List[String]) : Unit= ts match { + case Nil => println(Console.RESET) + case T_WHITESPACE::rest => print(Console.RESET + " "); interpret(rest, c + 1, ctr) + case T_WORD(s)::rest => { + val newstr = Console.RESET + ctr.reverse.mkString + s + if (c + s.length < WIDTH) { + print(newstr); + interpret(rest, c + s.length, ctr) + } + else { + print("\n" + newstr) + interpret(rest, s.length, ctr) + } + } + case T_BTAG("
")::rest => print("\n"); interpret(rest, 0, ctr) + case T_ETAG("
")::rest => print("\n"); interpret(rest, 0, ctr) + case T_BTAG("")::rest => interpret(rest, c, Console.BOLD :: ctr) + case T_BTAG("")::rest => interpret(rest, c, Console.UNDERLINED :: ctr) + case T_BTAG("")::rest => interpret(rest, c, Console.CYAN :: ctr) + case T_BTAG("")::rest => interpret(rest, c, Console.RED :: ctr) + case T_BTAG("")::rest => interpret(rest, c, Console.BLINK :: ctr) + case T_ETAG(_)::rest => interpret(rest, c, ctr.tail) + case _::rest => interpret(rest, c, ctr) +} + +val test_string = """ +MSc Projects + +
+start of paragraph. a cyan word normal again something longer. +
+ + +
Description: + Regular expressions are extremely useful for many text-processing tasks such as finding patterns in texts, + lexing programs, syntax highlighting and so on. Given that regular expressions were + introduced in 1950 by Stephen Kleene, you might think + regular expressions have since been studied and implemented to death. But you would definitely be mistaken: in fact they are still + an active research area. For example + this paper + about regular expression matching and partial derivatives was presented this summer at the international + PPDP'12 conference. The task in this project is to implement the results from this paper.
+ +
The background for this project is that some regular expressions are + evil + and can stab you in the back; according to + this blog post. + For example, if you use in Python or + in Ruby (probably also in other mainstream programming languages) the + innocently looking regular expression a?{28}a{28} and match it, say, against the string + aaaaaaaaaaaaaaaaaaaaaaaaaaaa (that is 28 as), you will soon notice that your CPU usage goes to 100%. In fact, + Python and Ruby need approximately 30 seconds of hard work for matching this string. You can try it for yourself: + re.py (Python version) and + re.rb + (Ruby version). You can imagine an attacker + mounting a nice DoS attack against + your program if it contains such an evil regular expression. Actually + Scala (and also Java) are almost immune from such + attacks as they can deal with strings of up to 4,300 as in less than a second. But if you scale + the regular expression and string further to, say, 4,600 as, then you get a + StackOverflowError + potentially crashing your program. +
+""" + +interpret(T.fromString(test_string), 0, Nil) diff -r 47f86885d481 -r e85600529ca5 scala/html1.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/html1.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,75 @@ + + +//:load matcher.scala + + +// some regular expressions +val LETTER = RANGE("""ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz.,!?-{[()]}':;%""") +val DIGIT = RANGE("0123456789") +val NONZERODIGIT = RANGE("123456789") + +val NAME = ALT(PLUS(LETTER), SEQS(PLUS(LETTER),"=", PLUS(LETTER))) +val BTAG = SEQS("<", NAME, ">") +val ETAG = SEQS("") + +val WORD = PLUS(ALT(LETTER, DIGIT)) +val WHITESPACE = PLUS(RANGE(" \n")) + +// for classifying the strings that have been recognised +abstract class Token +case object T_WHITESPACE extends Token +case class T_WORD(s: String) extends Token +case class T_ETAG(s: String) extends Token +case class T_BTAG(s: String) extends Token +case class T_NT(s: String, rhs: List[Token]) extends Token + +def tokenizer(rs: List[Rule[Token]], s: String) : List[Token] = + tokenize(rs, s.toList) + + + +// lexing rules for arithmetic expressions +val lexing_rules: List[Rule[Token]]= + List((BTAG, (s) => T_BTAG(s.mkString)), + (ETAG, (s) => T_ETAG(s.mkString)), + (WORD, (s) => T_WORD(s.mkString)), + (WHITESPACE, (s) => T_WHITESPACE)) + +val ts = tokenize_file(lexing_rules, "test.html") + + +val WIDTH = 60 + +def is_tag(t: Token) = t match { + case T_BTAG(_) => true + case T_ETAG(_) => true + case _ => false +} + +def interpret(ts: List[Token], c: Int, ctr: List[String]) : Unit= ts match { + case Nil => println(Console.RESET) + case T_WHITESPACE::rest => print(" "); interpret(rest, c + 1, ctr) + case T_WORD(s)::rest => { + val newc = c + s.length + val newstr = Console.RESET + ctr.reverse.mkString + s + if (newc < WIDTH) { + print(newstr); + interpret(rest, newc, ctr) + } + else { + print("\n" + newstr) + interpret(rest, s.length, ctr) + } + } + case T_BTAG("
")::rest => print("\n"); interpret(rest, 0, ctr) + case T_ETAG("
")::rest => print("\n"); interpret(rest, 0, ctr) + case T_BTAG("")::rest => interpret(rest, c, Console.BOLD :: ctr) + case T_BTAG("")::rest => interpret(rest, c, Console.UNDERLINED :: ctr) + case T_BTAG("")::rest => interpret(rest, c, Console.CYAN :: ctr) + case T_BTAG("")::rest => interpret(rest, c, Console.RED :: ctr) + case T_BTAG("")::rest => interpret(rest, c, Console.BLINK :: ctr) + case T_ETAG(_)::rest => interpret(rest, c, ctr.tail) + case _::rest => interpret(rest, c, ctr) +} + +interpret(ts, 0, Nil) diff -r 47f86885d481 -r e85600529ca5 scala/i.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/i.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,492 @@ + +// regular expressions including NOT +abstract class Rexp + +case object NULL extends Rexp +case object EMPTY extends Rexp +case object ALLC extends Rexp // recognises any character +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp +case class NOT(r: Rexp) extends Rexp // negation of a regular expression + + +// nullable function: tests whether the regular +// expression can recognise the empty string +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case ALLC => false + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true + case NOT(r) => !(nullable(r)) +} + +// tests whether a regular expression +// cannot recognise more +def no_more (r: Rexp) : Boolean = r match { + case NULL => true + case EMPTY => false + case ALLC => false + case CHAR(_) => false + case ALT(r1, r2) => no_more(r1) && no_more(r2) + case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1) + case STAR(_) => false + case NOT(r) => !(no_more(r)) +} + + +// derivative of a regular expression w.r.t. a character +def der (c: Char, r: Rexp) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL + case ALLC => EMPTY + case CHAR(d) => if (c == d) EMPTY else NULL + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r) => SEQ(der(c, r), STAR(r)) + case NOT(r) => NOT(der (c, r)) +} + +// regular expression for specifying +// ranges of characters +def Range(s : List[Char]) : Rexp = s match { + case Nil => NULL + case c::Nil => CHAR(c) + case c::s => ALT(CHAR(c), Range(s)) +} +def RANGE(s: String) = Range(s.toList) + + +// one or more +def PLUS(r: Rexp) = SEQ(r, STAR(r)) + +// many alternatives +def Alts(rs: List[Rexp]) : Rexp = rs match { + case Nil => NULL + case r::Nil => r + case r::rs => ALT(r, Alts(rs)) +} +def ALTS(rs: Rexp*) = Alts(rs.toList) + +// repetitions +def Seqs(rs: List[Rexp]) : Rexp = rs match { + case Nil => NULL + case r::Nil => r + case r::rs => SEQ(r, Seqs(rs)) +} +def SEQS(rs: Rexp*) = Seqs(rs.toList) + +// some convenience for typing in regular expressions +def charlist2rexp(s : List[Char]) : Rexp = s match { + case Nil => EMPTY + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) + + +type Rule[T] = (Rexp, List[Char] => T) + +case class Tokenizer[T](rules: List[Rule[T]], excl: List[T] = Nil) { + + def munch(r: Rexp, action: List[Char] => T, s: List[Char], t: List[Char]) : Option[(List[Char], T)] = + s match { + case Nil if (nullable(r)) => Some(Nil, action(t)) + case Nil => None + case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, action(t)) + case c::s if (no_more(der (c, r))) => None + case c::s => munch(der (c, r), action, s, t ::: List(c)) + } + + def one_token(s: List[Char]) : Either[(List[Char], T), String] = { + val somes = rules.map { (r) => munch(r._1, r._2, s, Nil) }.flatten + if (somes == Nil) Right(s.mkString) + else Left(somes sortBy (_._1.length) head) + } + + def tokenize(cs: List[Char]) : List[T] = cs match { + case Nil => Nil + case _ => one_token(cs) match { + case Left((rest, token)) => token :: tokenize(rest) + case Right(s) => { println("Cannot tokenize: \"" + s + "\""); Nil } + } + } + + def fromString(s: String) : List[T] = + tokenize(s.toList).filterNot(excl.contains(_)) + + def fromFile(name: String) : List[T] = + fromString(io.Source.fromFile(name).mkString) + +} + + +// parser combinators with input type I and return type T + +abstract class Parser[I <% Seq[_], T] { + def parse(ts: I): Set[(T, I)] + + def parse_all(ts: I) : Set[T] = + for ((head, tail) <- parse(ts); if (tail.isEmpty)) yield head + + def parse_single(ts: I) : T = parse_all(ts).toList match { + case t::Nil => t + case _ => { println ("Parse Error") ; sys.exit(-1) } + } + + def || (right : => Parser[I, T]) : Parser[I, T] = new AltParser(this, right) + def ==>[S] (f: => T => S) : Parser [I, S] = new FunParser(this, f) + def ~[S] (right : => Parser[I, S]) : Parser[I, (T, S)] = new SeqParser(this, right) + def ~>[S] (right : => Parser[I, S]) : Parser[I, S] = this ~ right ==> (_._2) + def <~[S] (right : => Parser[I, S]) : Parser[I, T] = this ~ right ==> (_._1) +} + +class SeqParser[I <% Seq[_], T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, (T, S)] { + def parse(sb: I) = + for ((head1, tail1) <- p.parse(sb); + (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2) +} + +class AltParser[I <% Seq[_], T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] { + def parse(sb: I) = p.parse(sb) ++ q.parse(sb) +} + +class FunParser[I <% Seq[_], T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] { + def parse(sb: I) = + for ((head, tail) <- p.parse(sb)) yield (f(head), tail) +} + + +// A parser and evaluator for teh while language +// +//:load matcher.scala +//:load parser3.scala + +// some regular expressions +val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_") +val DIGIT = RANGE("0123456789") +val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT))) +val NUM = PLUS(DIGIT) +val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false", "write") +val SEMI: Rexp = ";" +val OP: Rexp = ALTS(":=", "=", "-", "+", "*", "!=", "<", ">") +val WHITESPACE = PLUS(RANGE(" \n")) +val RPAREN: Rexp = ")" +val LPAREN: Rexp = "(" +val BEGIN: Rexp = "{" +val END: Rexp = "}" +val COMMENT = SEQS("/*", NOT(SEQS(STAR(ALLC), "*/", STAR(ALLC))), "*/") + +// tokens for classifying the strings that have been recognised +abstract class Token +case object T_WHITESPACE extends Token +case object T_COMMENT extends Token +case object T_SEMI extends Token +case object T_LPAREN extends Token +case object T_RPAREN extends Token +case object T_BEGIN extends Token +case object T_END extends Token +case class T_ID(s: String) extends Token +case class T_OP(s: String) extends Token +case class T_NUM(s: String) extends Token +case class T_KWD(s: String) extends Token + +val lexing_rules: List[Rule[Token]] = + List((KEYWORD, (s) => T_KWD(s.mkString)), + (ID, (s) => T_ID(s.mkString)), + (OP, (s) => T_OP(s.mkString)), + (NUM, (s) => T_NUM(s.mkString)), + (SEMI, (s) => T_SEMI), + (LPAREN, (s) => T_LPAREN), + (RPAREN, (s) => T_RPAREN), + (BEGIN, (s) => T_BEGIN), + (END, (s) => T_END), + (WHITESPACE, (s) => T_WHITESPACE), + (COMMENT, (s) => T_COMMENT)) + +// the tokenizer +val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE, T_COMMENT)) + +// the abstract syntax trees +abstract class Stmt +abstract class AExp +abstract class BExp +type Block = List[Stmt] +case object Skip extends Stmt +case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt +case class While(b: BExp, bl: Block) extends Stmt +case class Assign(s: String, a: AExp) extends Stmt +case class Write(s: String) extends Stmt + +case class Var(s: String) extends AExp +case class Num(i: Int) extends AExp +case class Aop(o: String, a1: AExp, a2: AExp) extends AExp + +case object True extends BExp +case object False extends BExp +case class Relop(o: String, a1: AExp, a2: AExp) extends BExp + +// atomic parsers +case class TokParser(tok: Token) extends Parser[List[Token], Token] { + def parse(ts: List[Token]) = ts match { + case t::ts if (t == tok) => Set((t, ts)) + case _ => Set () + } +} +implicit def token2tparser(t: Token) = TokParser(t) + +case object NumParser extends Parser[List[Token], Int] { + def parse(ts: List[Token]) = ts match { + case T_NUM(s)::ts => Set((s.toInt, ts)) + case _ => Set () + } +} + +case object IdParser extends Parser[List[Token], String] { + def parse(ts: List[Token]) = ts match { + case T_ID(s)::ts => Set((s, ts)) + case _ => Set () + } +} + + +// arithmetic expressions +lazy val AExp: Parser[List[Token], AExp] = + (T ~ T_OP("+") ~ AExp) ==> { case ((x, y), z) => Aop("+", x, z): AExp } || + (T ~ T_OP("-") ~ AExp) ==> { case ((x, y), z) => Aop("-", x, z): AExp } || T +lazy val T: Parser[List[Token], AExp] = + (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F +lazy val F: Parser[List[Token], AExp] = + (T_LPAREN ~> AExp <~ T_RPAREN) || + IdParser ==> Var || + NumParser ==> Num + +// boolean expressions +lazy val BExp: Parser[List[Token], BExp] = + (T_KWD("true") ==> ((_) => True: BExp)) || + (T_KWD("false") ==> ((_) => False: BExp)) || + (T_LPAREN ~> BExp <~ T_RPAREN) || + (AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Relop("=", x, z): BExp } || + (AExp ~ T_OP("!=") ~ AExp) ==> { case ((x, y), z) => Relop("!=", x, z): BExp } || + (AExp ~ T_OP("<") ~ AExp) ==> { case ((x, y), z) => Relop("<", x, z): BExp } || + (AExp ~ T_OP(">") ~ AExp) ==> { case ((x, y), z) => Relop("<", z, x): BExp } + +lazy val Stmt: Parser[List[Token], Stmt] = + (T_KWD("skip") ==> ((_) => Skip: Stmt)) || + (IdParser ~ T_OP(":=") ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } || + (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Block ~ T_KWD("else") ~ Block) ==> + { case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } || + (T_KWD("while") ~ BExp ~ T_KWD("do") ~ Block) ==> { case (((x, y), z), w) => While(y, w) } || + (T_KWD("write") ~ IdParser) ==> { case (x, y) => Write(y) } + +lazy val Stmts: Parser[List[Token], Block] = + (Stmt ~ T_SEMI ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } || + (Stmt ==> ((s) => List(s) : Block)) + +lazy val Block: Parser[List[Token], Block] = + (T_BEGIN ~> Stmts <~ T_END) || + (Stmt ==> ((s) => List(s))) + +// compiler +val beginning = """ +.class public XXX.XXX +.super java/lang/Object + +.method public ()V + aload_0 + invokenonvirtual java/lang/Object/()V + return +.end method + +.method public static write(I)V + .limit locals 5 + .limit stack 5 + iload 0 + getstatic java/lang/System/out Ljava/io/PrintStream; + swap + invokevirtual java/io/PrintStream/println(I)V + return +.end method + + +.method public static main([Ljava/lang/String;)V + .limit locals 200 + .limit stack 200 + +""" + +val ending = """ + + return + +.end method +""" + +// for generating new labels +var counter = -1 + +def Fresh(x: String) = { + counter += 1 + x ++ "_" ++ counter.toString() +} + +type Env = Map[String, String] +type Instrs = List[String] + +def compile_aexp(a: AExp, env : Env) : Instrs = a match { + case Num(i) => List("ldc " + i.toString + "\n") + case Var(s) => List("iload " + env(s) + "\n") + case Aop("+", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("iadd\n") + case Aop("-", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("isub\n") + case Aop("*", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("imul\n") +} + +def compile_bexp(b: BExp, env : Env, jmp: String) : Instrs = b match { + case True => Nil + case False => List("goto " + jmp + "\n") + case Relop("=", a1, a2) => + compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpne " + jmp + "\n") + case Relop("!=", a1, a2) => + compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpeq " + jmp + "\n") + case Relop("<", a1, a2) => + compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpge " + jmp + "\n") +} + + +def compile_stmt(s: Stmt, env: Env) : (Instrs, Env) = s match { + case Skip => (Nil, env) + case Assign(x, a) => { + val index = if (env.isDefinedAt(x)) env(x) else env.keys.size.toString + (compile_aexp(a, env) ++ + List("istore " + index + "\n"), env + (x -> index)) + } + case If(b, bl1, bl2) => { + val if_else = Fresh("If_else") + val if_end = Fresh("If_end") + val (instrs1, env1) = compile_bl(bl1, env) + val (instrs2, env2) = compile_bl(bl2, env1) + (compile_bexp(b, env, if_else) ++ + instrs1 ++ + List("goto " + if_end + "\n") ++ + List("\n" + if_else + ":\n\n") ++ + instrs2 ++ + List("\n" + if_end + ":\n\n"), env2) + } + case While(b, bl) => { + val loop_begin = Fresh("Loop_begin") + val loop_end = Fresh("Loop_end") + val (instrs1, env1) = compile_bl(bl, env) + (List("\n" + loop_begin + ":\n\n") ++ + compile_bexp(b, env, loop_end) ++ + instrs1 ++ + List("goto " + loop_begin + "\n") ++ + List("\n" + loop_end + ":\n\n"), env1) + } + case Write(x) => + (List("iload " + env(x) + "\n" + "invokestatic XXX/XXX/write(I)V\n"), env) +} + +def compile_bl(bl: Block, env: Env) : (Instrs, Env) = bl match { + case Nil => (Nil, env) + case s::bl => { + val (instrs1, env1) = compile_stmt(s, env) + val (instrs2, env2) = compile_bl(bl, env1) + (instrs1 ++ instrs2, env2) + } +} + +def compile(input: String) : String = { + val class_name = input.split('.')(0) + val tks = Tok.fromFile(input) + val ast = Stmts.parse_single(tks) + val instructions = compile_bl(ast, Map.empty)._1 + (beginning ++ instructions.mkString ++ ending).replaceAllLiterally("XXX", class_name) +} + + +def compile_to(input: String, output: String) = { + val fw = new java.io.FileWriter(output) + fw.write(compile(input)) + fw.close() +} + +// +val tks = Tok.fromString("x := x + 1") +val ast = Stmt.parse_single(tks) +println(compile_stmt(ast, Map("x" -> "n"))._1.mkString) + + + +//examples + +compile_to("loops.while", "loops.j") +//compile_to("fib.while", "fib.j") + + +// testing cases for time measurements +/* +def time_needed[T](i: Int, code: => T) = { + val start = System.nanoTime() + for (j <- 1 to i) code + val end = System.nanoTime() + (end - start)/(i * 1.0e9) +} + +// for testing +import scala.sys.process._ + +val test_prog = """ +start := XXX; +x := start; +y := start; +z := start; +while 0 < x do { + while 0 < y do { + while 0 < z do { + z := z - 1 + }; + z := start; + y := y - 1 + }; + y := start; + x := x - 1 +}; +write x; +write y; +write z +""" + + +def compile_test(n: Int) : Unit = { + val class_name = "LOOP" + val tks = Tok.fromString(test_prog.replaceAllLiterally("XXX", n.toString)) + val ast = Stmts.parse_single(tks) + val instructions = compile_bl(ast, Map.empty)._1 + val assembly = (beginning ++ instructions.mkString ++ ending).replaceAllLiterally("XXX", class_name) + val fw = new java.io.FileWriter(class_name + ".j") + fw.write(assembly) + fw.close() + val test = ("java -jar jvm/jasmin-2.4/jasmin.jar " + class_name + ".j").!! + println(n + " " + time_needed(2, ("java " + class_name + "/" + class_name).!!)) +} + +List(1, 5000, 10000, 50000, 100000, 250000, 500000, 750000, 1000000).map(compile_test(_)) + + + +// javabyte code assmbler +// +// java -jar jvm/jasmin-2.4/jasmin.jar loops.j + +*/ + + + + + diff -r 47f86885d481 -r e85600529ca5 scala/matcher.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/matcher.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,130 @@ +package object matcher { + +// regular expressions +// including constructors for NOT and ALLC +sealed abstract class Rexp + +case object NULL extends Rexp +case object EMPTY extends Rexp +case object ALLC extends Rexp // recognises any character +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp +case class NOT(r: Rexp) extends Rexp // negation of a regular expression + + +// nullable function: tests whether the regular +// expression can recognise the empty string +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case ALLC => false + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true + case NOT(r) => !(nullable(r)) +} + +// tests whether a regular expression +// cannot recognise more +def no_more (r: Rexp) : Boolean = r match { + case NULL => true + case EMPTY => false + case ALLC => false + case CHAR(_) => false + case ALT(r1, r2) => no_more(r1) && no_more(r2) + case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1) + case STAR(_) => false + case NOT(r) => !(no_more(r)) +} + + +// derivative of a regular expression w.r.t. a character +def der (c: Char, r: Rexp) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL + case ALLC => EMPTY + case CHAR(d) => if (c == d) EMPTY else NULL + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r) => SEQ(der(c, r), STAR(r)) + case NOT(r) => NOT(der (c, r)) +} + +// main class for the tokenizer +case class Tokenizer[T](rules: List[(Rexp, List[Char] => T)], excl: List[T] = Nil) { + +def munch(r: Rexp, action: List[Char] => T, s: List[Char], t: List[Char]) : Option[(List[Char], T)] = + s match { + case Nil if (nullable(r)) => Some(Nil, action(t)) + case Nil => None + case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, action(t)) + case c::s if (no_more(der (c, r))) => None + case c::s => munch(der (c, r), action, s, t ::: List(c)) + } + +def one_token(s: List[Char]) : Either[(List[Char], T), String] = { + val somes = rules.map { (r) => munch(r._1, r._2, s, Nil) }.flatten + if (somes == Nil) Right(s.mkString) + else Left(somes sortBy (_._1.length) head) +} + +def tokenize(cs: List[Char]) : List[T] = cs match { + case Nil => Nil + case _ => one_token(cs) match { + case Left((rest, token)) => token :: tokenize(rest) + case Right(s) => { println("Cannot tokenize: \"" + s + "\""); Nil } + } +} + +def fromString(s: String) : List[T] = + tokenize(s.toList).filterNot(excl.contains(_)) + +def fromFile(name: String) : List[T] = + fromString(io.Source.fromFile(name).mkString) + +} + + +// regular expression for specifying +// ranges of characters +def Range(s : List[Char]) : Rexp = s match { + case Nil => NULL + case c::Nil => CHAR(c) + case c::s => ALT(CHAR(c), Range(s)) +} +def RANGE(s: String) = Range(s.toList) + + +// one or more +def PLUS(r: Rexp) = SEQ(r, STAR(r)) + +// many alternatives +def Alts(rs: List[Rexp]) : Rexp = rs match { + case Nil => NULL + case r::Nil => r + case r::rs => ALT(r, Alts(rs)) +} +def ALTS(rs: Rexp*) = Alts(rs.toList) + +// repetitions +def Seqs(rs: List[Rexp]) : Rexp = rs match { + case Nil => NULL + case r::Nil => r + case r::rs => SEQ(r, Seqs(rs)) +} +def SEQS(rs: Rexp*) = Seqs(rs.toList) + +// some convenience for typing in regular expressions +def charlist2rexp(s : List[Char]) : Rexp = s match { + case Nil => EMPTY + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) + +} diff -r 47f86885d481 -r e85600529ca5 scala/mllex.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/mllex.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,109 @@ +:load matcher.scala + + +// some regular expressions +val KEYWORDS = ALTS(List("#", "(", ")", ",", "->", "...", ":", ":>", ";", "=", + "=>", "[", "]", "_", "{", "|", "}", "abstype", "and", "andalso", "as", + "case", "datatype", "do", "else", "end", "eqtype", "exception", "fn", + "fun", "functor", "handle", "if", "in", "include", "infix", "infixr", + "let", "local", "nonfix", "of", "op", "open", "orelse", "raise", "rec", + "sharing", "sig", "signature", "struct", "structure", "then", "type", + "val", "where", "while", "with", "withtype")) + +val DIGITS = RANGE("0123456789") +val NONZERODIGITS = RANGE("123456789") + +val POSITIVES = ALT(SEQ(NONZERODIGITS, STAR(DIGITS)), "0") +val INTEGERS = ALT(SEQ("~", POSITIVES), POSITIVES) + +val ALL = ALTS(KEYWORDS, INTEGERS) + +val COMMENT = SEQS("/*", NOT(SEGS(STAR(ALL), "*/", STAR(ALL))), "*/") + + + +val LPAREN = CHAR('(') +val RPAREN = CHAR(')') +val WHITESPACE = PLUS(RANGE(" \n".toList)) +val OPS = RANGE("+-*".toList) + +// for classifying the strings that have been recognised +abstract class Token +case object T_WHITESPACE extends Token +case object T_NUM extends Token +case class T_OP(s: String) extends Token +case object T_LPAREN extends Token +case object T_RPAREN extends Token +case class T_NT(s: String, rhs: List[Token]) extends Token + +def tokenizer(rs: List[Rule[Token]], s: String) : List[Token] = + tokenize(rs, s.toList).filterNot(_ match { + case T_WHITESPACE => true + case _ => false + }) + + + +// lexing rules for arithmetic expressions +val lexing_rules: List[Rule[Token]]= + List((NUMBER, (s) => T_NUM), + (WHITESPACE, (s) => T_WHITESPACE), + (LPAREN, (s) => T_LPAREN), + (RPAREN, (s) => T_RPAREN), + (OPS, (s) => T_OP(s.mkString))) + +tokenize_file(Nil, "nominal_library.ML") + + + + +type Grammar = List[(String, List[Token])] + +// grammar for arithmetic expressions +val grammar = + List ("E" -> List(T_NUM), + "E" -> List(T_NT("E", Nil), T_OP("+"), T_NT("E", Nil)), + "E" -> List(T_NT("E", Nil), T_OP("-"), T_NT("E", Nil)), + "E" -> List(T_NT("E", Nil), T_OP("*"), T_NT("E", Nil)), + "E" -> List(T_LPAREN, T_NT("E", Nil), T_RPAREN)) + +def startsWith[A](ts1: List[A], ts2: List[A]) : Boolean = (ts1, ts2) match { + case (_, Nil) => true + case (T_NT(e, _)::ts1,T_NT(f, _)::ts2) => (e == f) && startsWith(ts1, ts2) + case (t1::ts1, t2::ts2) => (t1 == t2) && startsWith(ts1, ts2) + case _ => false +} + +def chop[A](ts1: List[A], prefix: List[A], ts2: List[A]) : Option[(List[A], List[A])] = + ts1 match { + case Nil => None + case t::ts => + if (startsWith(ts1, prefix)) Some(ts2.reverse, ts1.drop(prefix.length)) + else chop(ts, prefix, t::ts2) + } + +// examples +chop(List(1,2,3,4,5,6,7,8,9), List(4,5), Nil) +chop(List(1,2,3,4,5,6,7,8,9), List(3,5), Nil) + +def replace[A](ts: List[A], out: List[A], in: List [A]) = + chop(ts, out, Nil) match { + case None => None + case Some((before, after)) => Some(before ::: in ::: after) + } + +def parse1(g: Grammar, ts: List[Token]) : Boolean = ts match { + case List(T_NT("E", tree)) => { println(tree); true } + case _ => { + val tss = for ((lhs, rhs) <- g) yield replace(ts, rhs, List(T_NT(lhs, rhs))) + tss.flatten.exists(parse1(g, _)) + } +} + + +println() ; parse1(grammar, tokenizer(lexing_rules, "2 + 3 * 4 + 1")) +println() ; parse1(grammar, tokenizer(lexing_rules, "(2 + 3) * (4 + 1)")) +println() ; parse1(grammar, tokenizer(lexing_rules, "(2 + 3) * 4 (4 + 1)")) + + + diff -r 47f86885d481 -r e85600529ca5 scala/parser1.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/parser1.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,88 @@ +// A naive bottom-up parser with backtracking +// +// Needs: +// :load matcher.scala + +// some regular expressions +val DIGIT = RANGE("0123456789") +val NONZERODIGIT = RANGE("123456789") + +val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") +val LPAREN = CHAR('(') +val RPAREN = CHAR(')') +val WHITESPACE = PLUS(RANGE(" \n")) +val OPS = RANGE("+-*") + +// for classifying the strings that have been recognised + +abstract class Token +case object T_WHITESPACE extends Token +case object T_NUM extends Token +case class T_OP(s: String) extends Token +case object T_LPAREN extends Token +case object T_RPAREN extends Token +case class NT(s: String) extends Token + +// lexing rules for arithmetic expressions +val lexing_rules: List[Rule[Token]]= + List((NUMBER, (s) => T_NUM), + (WHITESPACE, (s) => T_WHITESPACE), + (LPAREN, (s) => T_LPAREN), + (RPAREN, (s) => T_RPAREN), + (OPS, (s) => T_OP(s.mkString))) + +// the tokenizer +val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE)) + +type Grammar = List[(String, List[Token])] + +// grammar for arithmetic expressions +val grammar = + List ("F" -> List(T_NUM), + "E" -> List(T_NUM), + "E" -> List(NT("E"), T_OP("+"), NT("E")), + "E" -> List(NT("E"), T_OP("-"), NT("E")), + "E" -> List(NT("E"), T_OP("*"), NT("E")), + "E" -> List(T_LPAREN, NT("E"), T_RPAREN)) + + +def chop[A](ts1: List[A], prefix: List[A], ts2: List[A]) : Option[(List[A], List[A])] = + ts1 match { + case Nil => None + case t::ts => + if (ts1.startsWith(prefix)) Some(ts2.reverse, ts1.drop(prefix.length)) + else chop(ts, prefix, t::ts2) + } + +// examples for chop +chop(List(1,2,3,4,5,6,7,8,9), List(4,5), Nil) +chop(List(1,2,3,4,5,6,7,8,9), List(3,5), Nil) + +def replace[A](ts: List[A], out: List[A], in: List [A]) = + chop(ts, out, Nil) match { + case None => None + case Some((before, after)) => Some(before ::: in ::: after) + } + +def parse(g: Grammar, ts: List[Token]) : Boolean = { + println(ts) + if (ts == List(NT("E"))) true + else { + val tss = for ((lhs, rhs) <- g) yield replace(ts, rhs, List(NT(lhs))) + tss.flatten.exists(parse(g, _)) + } +} + +def parser(g: Grammar, s: String) = { + println("\n") + parse(g, Tok.fromString(s)) +} + + + +parser(grammar, "2 + 3 * 4 + 1") +parser(grammar, "(2 + 3) * (4 + 1)") +parser(grammar, "(2 + 3) * 4 (4 + 1)") + + + diff -r 47f86885d481 -r e85600529ca5 scala/parser2.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/parser2.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,139 @@ +// A naive version of parser combinators producing parse trees +// +// Needs +// :load matcher.scala + +// some regular expressions +val LETTER = RANGE("abcdefghijklmnopqrstuvwxyz") +val ID = PLUS(LETTER) + +val DIGIT = RANGE("0123456789") +val NONZERODIGIT = RANGE("123456789") +val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") + +val LPAREN = CHAR('(') +val RPAREN = CHAR(')') + +val WHITESPACE = PLUS(RANGE(" \n")) +val OPS = RANGE("+-*") + +// for classifying the strings that have been recognised +abstract class Token + +case object T_WHITESPACE extends Token +case class T_NUM(s: String) extends Token +case class T_ID(s: String) extends Token +case class T_OP(s: String) extends Token +case object T_LPAREN extends Token +case object T_RPAREN extends Token +case object T_IF extends Token +case object T_THEN extends Token +case object T_ELSE extends Token + +// lexing rules for arithmetic expressions +val lexing_rules: List[Rule[Token]]= + List(("if", (s) => T_IF), + ("then", (s) => T_THEN), + ("else", (s) => T_ELSE), + (NUMBER, (s) => T_NUM(s.mkString)), + (ID, (s) => T_ID(s.mkString)), + (WHITESPACE, (s) => T_WHITESPACE), + (LPAREN, (s) => T_LPAREN), + (RPAREN, (s) => T_RPAREN), + (OPS, (s) => T_OP(s.mkString))) + +val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE)) + + +// parse trees +abstract class ParseTree +case class Leaf(t: Token) extends ParseTree +case class Branch(pts: List[ParseTree]) extends ParseTree + +def combine(pt1: ParseTree, pt2: ParseTree) = pt1 match { + case Leaf(t) => Branch(List(Leaf(t), pt2)) + case Branch(pts) => Branch(pts ++ List(pt2)) +} + +// parser combinators +abstract class Parser { + def parse(ts: List[Token]): Set[(ParseTree, List[Token])] + + def parse_all(ts: List[Token]) : Set[ParseTree] = + for ((head, tail) <- parse(ts); if (tail == Nil)) yield head + + def || (right : => Parser) : Parser = new AltParser(this, right) + def ~ (right : => Parser) : Parser = new SeqParser(this, right) +} + +class AltParser(p: => Parser, q: => Parser) extends Parser { + def parse (ts: List[Token]) = p.parse(ts) ++ q.parse(ts) +} + +class SeqParser(p: => Parser, q: => Parser) extends Parser { + def parse(ts: List[Token]) = + for ((head1, tail1) <- p.parse(ts); + (head2, tail2) <- q.parse(tail1)) yield (combine(head1, head2), tail2) +} + +class ListParser(ps: => List[Parser]) extends Parser { + def parse(ts: List[Token]) = ps match { + case Nil => Set() + case p::Nil => p.parse(ts) + case p::ps => + for ((head1, tail1) <- p.parse(ts); + (head2, tail2) <- new ListParser(ps).parse(tail1)) yield (Branch(List(head1, head2)), tail2) + } +} + +case class TokParser(tok: Token) extends Parser { + def parse(ts: List[Token]) = ts match { + case t::ts if (t == tok) => Set((Leaf(t), ts)) + case _ => Set () + } +} + +implicit def token2tparser(t: Token) = TokParser(t) + +case object IdParser extends Parser { + def parse(ts: List[Token]) = ts match { + case T_ID(s)::ts => Set((Leaf(T_ID(s)), ts)) + case _ => Set () + } +} + +case object NumParser extends Parser { + def parse(ts: List[Token]) = ts match { + case T_NUM(s)::ts => Set((Leaf(T_NUM(s)), ts)) + case _ => Set () + } +} + +lazy val E: Parser = (T ~ T_OP("+") ~ E) || T // start symbol +lazy val T: Parser = (F ~ T_OP("*") ~ T) || F +lazy val F: Parser = (T_LPAREN ~ E ~ T_RPAREN) || NumParser + +println(Tok.fromString("1 + 2 + 3")) +println(E.parse_all(Tok.fromString("1 + 2 + 3"))) + +def eval(t: ParseTree) : Int = t match { + case Leaf(T_NUM(n)) => n.toInt + case Branch(List(t1, Leaf(T_OP("+")), t2)) => eval(t1) + eval(t2) + case Branch(List(t1, Leaf(T_OP("*")), t2)) => eval(t1) * eval(t2) + case Branch(List(Leaf(T_LPAREN), t, Leaf(T_RPAREN))) => eval(t) +} + +(E.parse_all(Tok.fromString("1 + 2 + 3"))).map(eval(_)) +(E.parse_all(Tok.fromString("1 + 2 * 3"))).map(eval(_)) + +lazy val EXPR: Parser = + new ListParser(List(T_IF, EXPR, T_THEN, EXPR)) || + new ListParser(List(T_IF, EXPR, T_THEN, EXPR, T_ELSE, EXPR)) || + IdParser + +println(EXPR.parse_all(Tok.fromString("if a then b else c"))) +println(EXPR.parse_all(Tok.fromString("if a then if x then y else c"))) + + + + diff -r 47f86885d481 -r e85600529ca5 scala/parser2a.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/parser2a.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,105 @@ +// Parser combinators including semantic actions +// parses lists of tokens +// +// Needs +// :load matcher.scala + +// some regular expressions +val LETTER = RANGE("abcdefghijklmnopqrstuvwxyz") +val ID = PLUS(LETTER) + +val DIGIT = RANGE("0123456789") +val NONZERODIGIT = RANGE("123456789") +val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") + +val LPAREN = CHAR('(') +val RPAREN = CHAR(')') + +val WHITESPACE = PLUS(RANGE(" \n")) +val OPS = RANGE("+-*") + +// for classifying the strings that have been recognised +abstract class Token + +case object T_WHITESPACE extends Token +case class T_NUM(s: String) extends Token +case class T_ID(s: String) extends Token +case class T_OP(s: String) extends Token +case object T_LPAREN extends Token +case object T_RPAREN extends Token +case object T_IF extends Token +case object T_THEN extends Token +case object T_ELSE extends Token + +// lexing rules for arithmetic expressions +val lexing_rules: List[Rule[Token]]= + List(("if", (s) => T_IF), + ("then", (s) => T_THEN), + ("else", (s) => T_ELSE), + (NUMBER, (s) => T_NUM(s.mkString)), + (ID, (s) => T_ID(s.mkString)), + (WHITESPACE, (s) => T_WHITESPACE), + (LPAREN, (s) => T_LPAREN), + (RPAREN, (s) => T_RPAREN), + (OPS, (s) => T_OP(s.mkString))) + +val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE)) + +// parser combinators with return type T +abstract class Parser[T] { + def parse(ts: List[Token]): Set[(T, List[Token])] + + def parse_all(ts: List[Token]) : Set[T] = + for ((head, tail) <- parse(ts); if (tail == Nil)) yield head + + def || (right : => Parser[T]) : Parser[T] = new AltParser(this, right) + def ==>[S] (f: => T => S) : Parser [S] = new FunParser(this, f) + def ~[S] (right : => Parser[S]) : Parser[(T, S)] = new SeqParser(this, right) + def ~>[S] (right : => Parser[S]) : Parser[S] = this ~ right ==> (x => x._2) + def <~[S] (right : => Parser[S]) : Parser[T] = this ~ right ==> (x => x._1) + +} + +class SeqParser[T, S](p: => Parser[T], q: => Parser[S]) extends Parser[(T, S)] { + def parse(sb: List[Token]) = + for ((head1, tail1) <- p.parse(sb); + (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2) +} + +class AltParser[T](p: => Parser[T], q: => Parser[T]) extends Parser[T] { + def parse (sb: List[Token]) = p.parse(sb) ++ q.parse(sb) +} + +class FunParser[T, S](p: => Parser[T], f: T => S) extends Parser[S] { + def parse (sb: List[Token]) = + for ((head, tail) <- p.parse(sb)) yield (f(head), tail) +} + + +case class TokParser(tok: Token) extends Parser[Token] { + def parse(ts: List[Token]) = ts match { + case t::ts if (t == tok) => Set((t, ts)) + case _ => Set () + } +} + +implicit def token2tparser(t: Token) = TokParser(t) + +case object NumParser extends Parser[Int] { + def parse(ts: List[Token]) = ts match { + case T_NUM(s)::ts => Set((s.toInt, ts)) + case _ => Set () + } +} + +lazy val E: Parser[Int] = (T ~ T_OP("+") ~ E) ==> { case ((x, y), z) => x + z } || T +lazy val T: Parser[Int] = (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => x * z } || F +lazy val F: Parser[Int] = (T_LPAREN ~> E <~ T_RPAREN) || NumParser + +println(E.parse_all(Tok.fromString("1 + 2 + 3"))) +println(E.parse_all(Tok.fromString("1 + 2 * 3"))) +println(E.parse_all(Tok.fromString("(1 + 2) * 3"))) + +// Excercise: implement minus +println(E.parse_all(Tok.fromString("(1 - 2) * 3"))) +println(E.parse_all(Tok.fromString("(1 + 2) * - 3"))) diff -r 47f86885d481 -r e85600529ca5 scala/parser3.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/parser3.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,41 @@ +package object parser { + +// parser combinators +// with input type I and return type T +// +// needs to be compiled with scalac parser3.scala + +abstract class Parser[I <% Seq[_], T] { + def parse(ts: I): Set[(T, I)] + + def parse_all(ts: I) : Set[T] = + for ((head, tail) <- parse(ts); if (tail.isEmpty)) yield head + + def parse_single(ts: I) : T = parse_all(ts).toList match { + case t::Nil => t + case _ => { println ("Parse Error") ; sys.exit(-1) } + } + + def || (right : => Parser[I, T]) : Parser[I, T] = new AltParser(this, right) + def ==>[S] (f: => T => S) : Parser [I, S] = new FunParser(this, f) + def ~[S] (right : => Parser[I, S]) : Parser[I, (T, S)] = new SeqParser(this, right) + def ~>[S] (right : => Parser[I, S]) : Parser[I, S] = this ~ right ==> (_._2) + def <~[S] (right : => Parser[I, S]) : Parser[I, T] = this ~ right ==> (_._1) +} + +class SeqParser[I <% Seq[_], T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, (T, S)] { + def parse(sb: I) = + for ((head1, tail1) <- p.parse(sb); + (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2) +} + +class AltParser[I <% Seq[_], T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] { + def parse(sb: I) = p.parse(sb) ++ q.parse(sb) +} + +class FunParser[I <% Seq[_], T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] { + def parse(sb: I) = + for ((head, tail) <- p.parse(sb)) yield (f(head), tail) +} + +} diff -r 47f86885d481 -r e85600529ca5 scala/parser4.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/parser4.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,82 @@ + +// parser combinators with input type I and return type T + +case class SubString(s: String, l: Int, h: Int) { + def low = l + def high = h + def length = h - l + def substring(l: Int = l, h: Int = h) = s.slice(l, h) + def set(low: Int = l, high: Int = h) = SubString(s, low, high) + +} + +type Ctxt = List[(String, SubString)] + +abstract class Parser[T] { + + def parse(ts: SubString, ctxt: Ctxt): Set[(T, SubString)] + + def parse_all(s: String) : Set[T] = + for ((head, tail) <- parse(SubString(s, 0, s.length), Nil); if (tail.substring() == "")) yield head + + def || (right : => Parser[T]) : Parser[T] = new AltParser(this, right) + def ==>[S] (f: => T => S) : Parser [S] = new FunParser(this, f) + def ~[S] (right : => Parser[S]) : Parser[(T, S)] = new SeqParser(this, right) +} + +class SeqParser[T, S](p: => Parser[T], q: => Parser[S]) extends Parser[(T, S)] { + def parse(sb: SubString, ctxt: Ctxt) = + for ((head1, tail1) <- p.parse(sb, ctxt); + (head2, tail2) <- q.parse(tail1, ctxt)) yield ((head1, head2), tail2) +} + +class AltParser[T](p: => Parser[T], q: => Parser[T]) extends Parser[T] { + def parse(sb: SubString, ctxt: Ctxt) = p.parse(sb, ctxt) ++ q.parse(sb, ctxt) +} + +class FunParser[T, S](p: => Parser[T], f: T => S) extends Parser[S] { + def parse(sb: SubString, ctxt: Ctxt) = + for ((head, tail) <- p.parse(sb, ctxt)) yield (f(head), tail) +} + +case class SubStringParser(s: String) extends Parser[SubString] { + val n = s.length + def parse(sb: SubString, ctxt: Ctxt) = { + if (n <= sb.length && sb.substring(sb.low, sb.low + n) == s) + Set((sb.set(high = sb.low + n), sb.set(low = sb.low + n))) + else Set() + } +} + +implicit def string2parser(s: String) = SubStringParser(s) ==> (_.substring()) + +class IgnLst[T](p: => Parser[T]) extends Parser[T] { + def parse(sb: SubString, ctxt: Ctxt) = { + if (sb.length == 0) Set() + else for ((head, tail) <- p.parse(sb.set(high = sb.high - 1), ctxt)) + yield (head, tail.set(high = tail.high + 1)) + } +} + +class CHECK[T](nt: String, p: => Parser[T]) extends Parser[T] { + def parse(sb: SubString, ctxt: Ctxt) = { + val should_trim = ctxt.contains (nt, sb) + if (should_trim && sb.length == 0) Set() + else if (should_trim) new IgnLst(p).parse(sb, (nt, sb)::ctxt) + else p.parse(sb, (nt, sb)::ctxt) + } +} + +// ambigous grammar +lazy val E: Parser[Int] = + new CHECK("E", (E ~ "+" ~ E) ==> { case ((x, y), z) => x + z} || + (E ~ "*" ~ E) ==> { case ((x, y), z) => x * z} || + ("(" ~ E ~ ")") ==> { case ((x, y), z) => y} || + "0" ==> { (s) => 0 } || + "1" ==> { (s) => 1 } || + "2" ==> { (s) => 2 } || + "3" ==> { (s) => 3 }) + +println(E.parse_all("1+2*3+3")) + + diff -r 47f86885d481 -r e85600529ca5 scala/parser5.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/parser5.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,113 @@ +val DIGIT = RANGE("0123456789") +val NONZERODIGIT = RANGE("123456789") + +val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") +val LPAREN = CHAR('(') +val RPAREN = CHAR(')') +val WHITESPACE = PLUS(RANGE(" \n")) +val OPS = RANGE("+-*") + +// for classifying the strings that have been recognised +abstract class Token +case object T_WHITESPACE extends Token +case class T_NUM(s: String) extends Token +case class T_OP(s: String) extends Token +case object T_LPAREN extends Token +case object T_RPAREN extends Token + +val lexing_rules: List[Rule[Token]]= + List((NUMBER, (s) => T_NUM(s.mkString)), + (WHITESPACE, (s) => T_WHITESPACE), + (LPAREN, (s) => T_LPAREN), + (RPAREN, (s) => T_RPAREN), + (OPS, (s) => T_OP(s.mkString))) + +val Tk = Tokenizer(lexing_rules, List(T_WHITESPACE)) + + +// parser combinators with input type I and return type T +// and memoisation + +case class SubList[T](s: List[T], l: Int, h: Int) { + def low = l + def high = h + def length = h - l + def sublist(l: Int = l, h: Int = h) = s.slice(l, h) + def set(low: Int = l, high: Int = h) = SubList(s, low, high) +} + +type Ctxt[T] = List[(String, SubList[T])] + +abstract class Parser[I, T] { + + def parse(ts: SubList[I], ctxt: Ctxt[I]): Set[(T, SubList[I])] + + def parse_all(s: List[I]) : Set[T] = + for ((head, tail) <- parse(SubList(s, 0, s.length), Nil); if (tail.sublist() == Nil)) yield head + + def || (right : => Parser[I, T]) : Parser[I, T] = new AltParser(this, right) + def ==>[S] (f: => T => S) : Parser [I, S] = new FunParser(this, f) + def ~[S] (right : => Parser[I, S]) : Parser[I, (T, S)] = new SeqParser(this, right) + def ~>[S] (right : => Parser[I, S]) : Parser[I, S] = this ~ right ==> (_._2) + def <~[S] (right : => Parser[I, S]) : Parser[I, T] = this ~ right ==> (_._1) +} + +class SeqParser[I, T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, (T, S)] { + def parse(sb: SubList[I], ctxt: Ctxt[I]) = + for ((head1, tail1) <- p.parse(sb, ctxt); + (head2, tail2) <- q.parse(tail1, ctxt)) yield ((head1, head2), tail2) +} + +class AltParser[I, T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] { + def parse(sb: SubList[I], ctxt: Ctxt[I]) = p.parse(sb, ctxt) ++ q.parse(sb, ctxt) +} + +class FunParser[I, T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] { + def parse(sb: SubList[I], ctxt: Ctxt[I]) = + for ((head, tail) <- p.parse(sb, ctxt)) yield (f(head), tail) +} + +case object NumParser extends Parser[Token, Int] { + def parse(sb: SubList[Token], ctxt: Ctxt[Token]) = { + if (0 < sb.length) sb.sublist(sb.low, sb.low + 1) match { + case T_NUM(i)::Nil => Set((i.toInt, sb.set(low = sb.low + 1))) + case _ => Set() + } + else Set() + } +} + +case class TokParser(t: Token) extends Parser[Token, Token] { + def parse(sb: SubList[Token], ctxt: Ctxt[Token]) = { + if (0 < sb.length && sb.sublist(sb.low, sb.low + 1) == List(t)) Set((t, sb.set(low = sb.low + 1))) + else Set() + } +} + +implicit def token2tparser(t: Token) = TokParser(t) + +class IgnLst[I, T](p: => Parser[I, T]) extends Parser[I, T] { + def parse(sb: SubList[I], ctxt: Ctxt[I]) = { + if (sb.length == 0) Set() + else for ((head, tail) <- p.parse(sb.set(high = sb.high - 1), ctxt)) + yield (head, tail.set(high = tail.high + 1)) + } +} + +class CHECK[I, T](nt: String, p: => Parser[I, T]) extends Parser[I, T] { + def parse(sb: SubList[I], ctxt: Ctxt[I]) = { + val should_trim = ctxt.contains (nt, sb) + if (should_trim && sb.length == 0) Set() + else if (should_trim) new IgnLst(p).parse(sb, (nt, sb)::ctxt) + else p.parse(sb, (nt, sb)::ctxt) + } +} + +lazy val E: Parser[Token, Int] = + new CHECK("E", (E ~ T_OP("+") ~ E) ==> { case ((x, y), z) => x + z} || + (E ~ T_OP("*") ~ E) ==> { case ((x, y), z) => x * z} || + (T_LPAREN ~ E ~ T_RPAREN) ==> { case ((x, y), z) => y} || + NumParser) + +println(E.parse_all(Tk.fromString("1 + 2 * 3"))) +println(E.parse_all(Tk.fromString("(1 + 2) * 3"))) diff -r 47f86885d481 -r e85600529ca5 scala/re-alt.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/re-alt.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,115 @@ +trait RegExp { + def nullable: Boolean + def derive(c: Char): RegExp +} + +case object Empty extends RegExp { + def nullable = false + def derive(c: Char) = Empty +} + +case object Eps extends RegExp { + def nullable = true + def derive(c: Char) = Empty +} + +case class Str(s: String) extends RegExp { + def nullable = s.isEmpty + def derive(c: Char) = + if (s.isEmpty || s.head != c) Empty + else Str(s.tail) +} + +case class Cat(r: RegExp, s: RegExp) extends RegExp { + def nullable = r.nullable && s.nullable + def derive(c: Char) = + if (r.nullable) Or(Cat(r.derive(c), s), s.derive(c)) + else Cat(r.derive(c), s) +} + +case class Star(r: RegExp) extends RegExp { + def nullable = true + def derive(c: Char) = Cat(r.derive(c), this) +} + +case class Or(r: RegExp, s: RegExp) extends RegExp { + def nullable = r.nullable || s.nullable + def derive(c: Char) = Or(r.derive(c), s.derive(c)) +} + +case class And(r: RegExp, s: RegExp) extends RegExp { + def nullable = r.nullable && s.nullable + def derive(c: Char) = And(r.derive(c), s.derive(c)) +} + +case class Not(r: RegExp) extends RegExp { + def nullable = !r.nullable + def derive(c: Char) = Not(r.derive(c)) +} + + + + +object Matcher { + def matches(r: RegExp, s: String): Boolean = { + if (s.isEmpty) r.nullable + else matches(r.derive(s.head), s.tail) + } +} + + +object Pimps { + implicit def string2RegExp(s: String) = Str(s) + + implicit def regExpOps(r: RegExp) = new { + def | (s: RegExp) = Or(r, s) + def & (s: RegExp) = And(r, s) + def % = Star(r) + def %(n: Int) = rep(r, n) + def ? = Or(Eps, r) + def ! = Not(r) + def ++ (s: RegExp) = Cat(r, s) + def ~ (s: String) = Matcher.matches(r, s) + } + + implicit def stringOps(s: String) = new { + def | (r: RegExp) = Or(s, r) + def | (r: String) = Or(s, r) + def & (r: RegExp) = And(s, r) + def & (r: String) = And(s, r) + def % = Star(s) + def % (n: Int) = rep(Str(s), n) + def ? = Or(Eps, s) + def ! = Not(s) + def ++ (r: RegExp) = Cat(s, r) + def ++ (r: String) = Cat(s, r) + def ~ (t: String) = Matcher.matches(s, t) + } + + def rep(r: RegExp, n: Int): RegExp = + if (n <= 0) Star(r) + else Cat(r, rep(r, n - 1)) +} + + +object Test { + import Pimps._ + + val digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" + val int = ("+" | "-").? ++ digit.%(1) + val real = ("+" | "-").? ++ digit.%(1) ++ ("." ++ digit.%(1)).? ++ (("e" | "E") ++ ("+" | "-").? ++ digit.%(1)).? + + def main(args: Array[String]) { + val ints = List("0", "-4534", "+049", "99") + val reals = List("0.9", "-12.8", "+91.0", "9e12", "+9.21E-12", "-512E+01") + val errs = List("", "-", "+", "+-1", "-+2", "2-") + + ints.foreach(s => assert(int ~ s)) + reals.foreach(s => assert(!(int ~ s))) + errs.foreach(s => assert(!(int ~ s))) + + ints.foreach(s => assert(real ~ s)) + reals.foreach(s => assert(real ~ s)) + errs.foreach(s => assert(!(real ~ s))) + } +} diff -r 47f86885d481 -r e85600529ca5 scala/re-internal.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/re-internal.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,17 @@ + +// measures the time a function needs +def time_needed[T](i: Int, code: => T) = { + val start = System.nanoTime() + for (j <- 1 to i) code + val end = System.nanoTime() + (end - start)/(i * 1.0e9) +} + + +for (i <- 1 to 10001 by 300) { + val re = ("((a?){" + i + "})(a{" + i + "})") + println(i + " " + "%.5f".format(time_needed(1, ("a" * i).matches(re)))) +} + + + diff -r 47f86885d481 -r e85600529ca5 scala/re.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/re.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,123 @@ + +// regular expressions including NOT +abstract class Rexp + +case object NULL extends Rexp +case object EMPTY extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp +case class NOT(r: Rexp) extends Rexp + + +// some convenience for typing in regular expressions +def charlist2rexp(s : List[Char]) : Rexp = s match { + case Nil => EMPTY + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) + + +// nullable function: tests whether the regular +// expression can recognise the empty string +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true + case NOT(r) => !(nullable(r)) +} + +// tests whether a regular expression +// cannot recognise more +def no_more (r: Rexp) : Boolean = r match { + case NULL => true + case EMPTY => false + case CHAR(_) => false + case ALT(r1, r2) => no_more(r1) && no_more(r2) + case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1) + case STAR(_) => false + case NOT(r) => !(no_more(r)) +} + + +// derivative of a regular expression w.r.t. a character +def der (c: Char, r: Rexp) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL + case CHAR(d) => if (c == d) EMPTY else NULL + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r) => SEQ(der(c, r), STAR(r)) + case NOT(r) => NOT(der (c, r)) +} + + +// regular expression for specifying +// ranges of characters +def RANGE(s : List[Char]) : Rexp = s match { + case Nil => NULL + case c::Nil => CHAR(c) + case c::s => ALT(CHAR(c), RANGE(s)) +} + +//one or more +def PLUS(r: Rexp) = SEQ(r, STAR(r)) + + +//some regular expressions +val LOWERCASE = RANGE("abcdefghijklmnopqrstuvwxyz".toList) +val UPPERCASE = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZ".toList) +val LETTER = ALT(LOWERCASE, UPPERCASE) +val DIGIT = RANGE("0123456789".toList) +val NONZERODIGIT = RANGE("123456789".toList) + +val IDENT = SEQ(LETTER, STAR(ALT(LETTER,DIGIT))) +val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") +val WHITESPACE = RANGE(" \n".toList) +val WHITESPACES = PLUS(WHITESPACE) + +val ALL = ALT(ALT(LETTER, DIGIT), WHITESPACE) +val COMMENT = SEQ(SEQ("/*", NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL)))), "*/") + + +// an example list of regular expressions +val regs: List[Rexp]= List("if", "then", "else", "+", IDENT, NUMBER, WHITESPACES, COMMENT) + + +def error (s: String) = throw new IllegalArgumentException ("Could not lex " + s) + +def munch(r: Rexp, s: List[Char], t: List[Char]) : Option[(List[Char], List[Char])] = + s match { + case Nil if (nullable(r)) => Some(Nil, t) + case Nil => None + case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, t) + case c::s if (no_more(der (c, r))) => None + case c::s => munch(der (c, r), s, t ::: List(c)) + } + +def one_string (regs: List[Rexp], s: List[Char]) : (List[Char], List[Char]) = { + val somes = regs.map { munch(_, s, Nil) } .flatten + if (somes == Nil) error(s.mkString) else (somes sortBy (_._1.length) head) +} + +def tokenize (regs: List[Rexp], s: List[Char]) : List[String] = s match { + case Nil => Nil + case _ => one_string(regs, s) match { + case (rest, s) => s.mkString :: tokenize(regs, rest) + } +} + +//examples +println(tokenize(regs, "if true then then 42 else +".toList)) +println(tokenize(regs, "if+true+then+then+42+else +".toList)) +println(tokenize(regs, "ifff if 34 34".toList)) +println(tokenize(regs, "/*ifff if */ hhjj /*34 */".toList)) +println(tokenize(regs, "/* if true then */ then 42 else +".toList)) +//println(tokenize(regs, "ifff $ if 34".toList)) // causes an error because of the symbol $ diff -r 47f86885d481 -r e85600529ca5 scala/re0.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/re0.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,117 @@ +import scala.annotation.tailrec + +abstract class Rexp + +case object NULL extends Rexp +case object EMPTY extends Rexp +case object ALLCHAR extends Rexp +case class CHAR(c: Char) extends Rexp +case class STR(s: String) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp +case class NOT(r: Rexp) extends Rexp +case class REP(r: Rexp, n: Int) extends Rexp + +// some convenience for typing in regular expressions +implicit def string2rexp(s : String) : Rexp = STR(s) + +implicit def RexpOps(r: Rexp) = new { + def | (s: Rexp) = ALT(r, s) + def % = STAR(r) + def %(n: Int) = REP(r, n) + def %%(n: Int) = SEQ(REP(r, n), STAR(r)) + def ? = ALT(EMPTY, r) + def unary_! = NOT(r) + def ~ (s: Rexp) = SEQ(r, s) +} + +implicit def stringOps(s: String) = new { + def | (r: Rexp) = ALT(s, r) + def | (r: String) = ALT(s, r) + def % = STAR(s) + def %(n: Int) = REP(s, n) + def %%(n: Int) = SEQ(REP(s, n), STAR(s)) + def ? = ALT(EMPTY, s) + def unary_! = NOT(s) + def ~ (r: Rexp) = SEQ(s, r) + def ~ (r: String) = SEQ(s, r) +} + + +// nullable function: tests whether the regular +// expression can recognise the empty string + +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case ALLCHAR => false + case CHAR(_) => false + case STR(s) => s.isEmpty + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true + case NOT(r) => !(nullable(r)) + case REP(r, i) => if (i == 0) true else nullable(r) +} + +// derivative of a regular expression w.r.t. a character +def der (c: Char, r: Rexp) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL + case ALLCHAR => EMPTY + case CHAR(d) => if (c == d) EMPTY else NULL + case STR(s) => if (s.isEmpty || s.head != c) NULL else STR(s.tail) + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r) => SEQ(der(c, r), STAR(r)) + case NOT(r) => NOT(der (c, r)) + case REP(r, i) => + if (i == 0) NULL else SEQ(der(c, r), REP(r, i - 1)) +} + +// derivative w.r.t. a string (iterates der) +@tailrec +def ders (s: List[Char], r: Rexp) : Rexp = s match { + case Nil => r + case c::s => ders(s, der(c, r)) +} + +// main matcher function +def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) + +//examples +val digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" +val int = ("+" | "-").? ~ digit.%%(1) +val real = ("+" | "-").? ~ digit.%%(1) ~ ("." ~ digit.%%(1)).? ~ (("e" | "E") ~ ("+" | "-").? ~ digit.%%(1)).? + +val ints = List("0", "-4534", "+049", "99") +val reals = List("0.9", "-12.8", "+91.0", "9e12", "+9.21E-12", "-512E+01") +val errs = List("", "-", "+", "+-1", "-+2", "2-") + +ints.map(s => matcher(int, s)) +reals.map(s => matcher(int, s)) +errs.map(s => matcher(int, s)) + +ints.map(s => matcher(real, s)) +reals.map(s => matcher(real, s)) +errs.map(s => matcher(real, s)) + + + +def RTEST(n: Int) = ("a".? %(n)) ~ ("a" %(n)) + +def time_needed[T](i: Int, code: => T) = { + val start = System.nanoTime() + for (j <- 1 to i) code + val end = System.nanoTime() + (end - start)/(i * 1.0e9) +} + +for (i <- 1 to 12000 by 500) { + println(i + ": " + "%.5f".format(time_needed(1, matcher(RTEST(i), "a" * i)))) +} + + diff -r 47f86885d481 -r e85600529ca5 scala/re1.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/re1.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,80 @@ + +abstract class Rexp + +case object NULL extends Rexp +case object EMPTY extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp + +// some convenience for typing in regular expressions +def charlist2rexp(s : List[Char]) : Rexp = s match { + case Nil => EMPTY + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) + + +// nullable function: tests whether the regular +// expression can recognise the empty string +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true +} + +// derivative of a regular expression w.r.t. a character +def der (c: Char, r: Rexp) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL + case CHAR(d) => if (c == d) EMPTY else NULL + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r) => SEQ(der(c, r), STAR(r)) +} + +// derivative w.r.t. a string (iterates der) +def ders (s: List[Char], r: Rexp) : Rexp = s match { + case Nil => r + case c::s => ders(s, der(c, r)) +} + +// main matcher function +def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) + +//example +//val r = STAR(ALT(SEQ(CHAR('a'), CHAR('b')), CHAR('b'))) +//der('b', r) +//der('b', r) + +//one or zero +def OPT(r: Rexp) = ALT(r, EMPTY) + +//n-times +def NTIMES(r: Rexp, n: Int) : Rexp = n match { + case 0 => EMPTY + case 1 => r + case n => SEQ(r, NTIMES(r, n - 1)) +} + +def RTEST(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n)) + +def time_needed[T](i: Int, code: => T) = { + val start = System.nanoTime() + for (j <- 1 to i) code + val end = System.nanoTime() + (end - start)/(i * 1.0e9) +} + +for (i <- 1 to 29) { + println(i + ": " + "%.5f".format(time_needed(1, matcher(RTEST(i), "a" * i)))) +} + + diff -r 47f86885d481 -r e85600529ca5 scala/re2.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/re2.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,100 @@ + +abstract class Rexp { + def simp : Rexp = this +} + +case object NULL extends Rexp +case object EMPTY extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp { + override def simp = (r1.simp, r2.simp) match { + case (NULL, r) => r + case (r, NULL) => r + case (r, EMPTY) => if (nullable(r)) r else ALT(r, EMPTY) + case (EMPTY, r) => if (nullable(r)) r else ALT(r, EMPTY) + case (r1, r2) => if (r1 == r2) r1 else ALT(r1, r2) + } +} +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp { + override def simp = (r1.simp, r2.simp) match { + case (NULL, _) => NULL + case (_, NULL) => NULL + case (EMPTY, r) => r + case (r, EMPTY) => r + case (r1, r2) => SEQ(r1, r2) + } +} +case class STAR(r: Rexp) extends Rexp { + override def simp = r.simp match { + case NULL => EMPTY + case EMPTY => EMPTY + case r => STAR(r) + } +} + +// some convenience for typing in regular expressions +def charlist2rexp(s : List[Char]) : Rexp = s match { + case Nil => EMPTY + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) + + +// nullable function: tests whether the regular +// expression can recognise the empty string +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true +} + +// derivative of a regular expression w.r.t. a character +def der (c: Char, r: Rexp) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL + case CHAR(d) => if (c == d) EMPTY else NULL + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r) => SEQ(der(c, r), STAR(r)) +} + +// derivative w.r.t. a string (iterates der) +def ders (s: List[Char], r: Rexp) : Rexp = s match { + case Nil => r + case c::s => ders(s, der(c, r).simp) +} + +// main matcher function +def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) + + +//one or zero +def OPT(r: Rexp) = ALT(r, EMPTY) + +//n-times +def NTIMES(r: Rexp, n: Int) : Rexp = n match { + case 0 => EMPTY + case 1 => r + case n => SEQ(r, NTIMES(r, n - 1)) +} + +def RTEST(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n)) + +def time_needed[T](i: Int, code: => T) = { + val start = System.nanoTime() + for (j <- 1 to i) code + val end = System.nanoTime() + (end - start)/(i * 1.0e9) +} + +for (i <- 1 to 100) { + println(i + ": " + "%.5f".format(time_needed(1, matcher(RTEST(i), "a" * i)))) +} + + diff -r 47f86885d481 -r e85600529ca5 scala/re3.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/re3.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,106 @@ + +abstract class Rexp { + def simp : Rexp = this +} + +case object NULL extends Rexp +case object EMPTY extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp { + override def simp = (r1.simp, r2.simp) match { + case (NULL, r) => r + case (r, NULL) => r + case (r, EMPTY) => if (nullable(r)) r else ALT(r, EMPTY) + case (EMPTY, r) => if (nullable(r)) r else ALT(r, EMPTY) + case (r1, r2) => if (r1 == r2) r1 else ALT(r1, r2) + } +} +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp { + override def simp = (r1.simp, r2.simp) match { + case (NULL, _) => NULL + case (_, NULL) => NULL + case (EMPTY, r) => r + case (r, EMPTY) => r + case (r1, r2) => SEQ(r1, r2) + } +} +case class STAR(r: Rexp) extends Rexp { + override def simp = r.simp match { + case NULL => EMPTY + case EMPTY => EMPTY + case r => STAR(r) + } +} +case class NTIMES(r: Rexp, n: Int) extends Rexp { + override def simp = if (n == 0) EMPTY else + r.simp match { + case NULL => NULL + case EMPTY => EMPTY + case r => NTIMES(r, n) + } +} + +// some convenience for typing in regular expressions +def charlist2rexp(s : List[Char]) : Rexp = s match { + case Nil => EMPTY + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) + + +// nullable function: tests whether the regular +// expression can recognise the empty string +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true + case NTIMES(r, i) => if (i == 0) true else nullable(r) +} + +// derivative of a regular expression w.r.t. a character +def der (c: Char, r: Rexp) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL + case CHAR(d) => if (c == d) EMPTY else NULL + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r) => SEQ(der(c, r), STAR(r)) + case NTIMES(r, i) => + if (i == 0) NULL else SEQ(der(c, r), NTIMES(r, i - 1)) +} + +// derivative w.r.t. a string (iterates der) +def ders (s: List[Char], r: Rexp) : Rexp = s match { + case Nil => r + case c::s => ders(s, der(c, r).simp) +} + +// main matcher function +def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) + + + +//one or zero +def OPT(r: Rexp) = ALT(r, EMPTY) + +def RTEST(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n)) + +def time_needed[T](i: Int, code: => T) = { + val start = System.nanoTime() + for (j <- 1 to i) code + val end = System.nanoTime() + (end - start)/(i * 1.0e9) +} + + +for (i <- 1 to 11001 by 500) { + println(i + " " + + " " + time_needed(1, matcher(RTEST(i), "a" * i))) +} + + diff -r 47f86885d481 -r e85600529ca5 scala/re4.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/re4.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,101 @@ +import scala.annotation.tailrec +abstract class Rexp { + def simp : Rexp = this +} + +case object NULL extends Rexp +case object EMPTY extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp { + override def simp = (r1.simp, r2.simp) match { + case (NULL, r) => r + case (r, NULL) => r + case (r, EMPTY) => if (nullable(r)) r else ALT(r, EMPTY) + case (EMPTY, r) => if (nullable(r)) r else ALT(r, EMPTY) + case (r1, r2) => ALT(r1, r2) + } +} +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp { + override def simp = (r1.simp, r2.simp) match { + case (NULL, _) => NULL + case (_, NULL) => NULL + case (EMPTY, r) => r + case (r, EMPTY) => r + case (r1, r2) => SEQ(r1, r2) + } +} +case class STAR(r: Rexp) extends Rexp +case class NTIMES(r: Rexp, n: Int) extends Rexp + +// some convenience for typing in regular expressions +def charlist2rexp(s : List[Char]) : Rexp = s match { + case Nil => EMPTY + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) + + +// nullable function: tests whether the regular +// expression can recognise the empty string +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true + case NTIMES(r, i) => if (i == 0) false else nullable(r) +} + +// derivative of a regular expression w.r.t. a character +def der (c: Char, r: Rexp) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL + case CHAR(d) => if (c == d) EMPTY else NULL + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r) => SEQ(der(c, r), STAR(r)) + case NTIMES(r, i) => + if (i == 0) NULL else SEQ(der(c, r), NTIMES(r, i - 1)) +} + +// derivative w.r.t. a string (iterates der) +@tailrec +def ders (s: List[Char], r: Rexp) : Rexp = s match { + case Nil => r + case c::s => ders(s, der(c, r).simp) +} + +// main matcher function +def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) + + + +//one or zero +def OPT(r: Rexp) = ALT(r, EMPTY) + +//n-times +/*def NTIMES(r: Rexp, n: Int) : Rexp = n match { + case 0 => NULL + case 1 => r + case n => SEQ(r, NTIMES(r, n - 1)) +}*/ + +def RTEST(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n)) + +def time_needed[T](i: Int, code: => T) = { + val start = System.nanoTime() + for (j <- 1 to i) code + val end = System.nanoTime() + (end - start)/(i * 1.0e9) +} + + +for (i <- 1 to 13001 by 500) { + println(i + " " + time_needed(1, matcher(RTEST(i), "a" * i))) +} + + diff -r 47f86885d481 -r e85600529ca5 scala/regexp.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/regexp.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,106 @@ +// regular expressions +abstract class Rexp + +case object NULL extends Rexp +case object EMPTY extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp + +// some convenience for typing in regular expressions +def charlist2rexp(s : List[Char]) : Rexp = s match { + case Nil => EMPTY + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) + + +// for example +println(STAR("abc")) + +// produces STAR(SEQ(CHAR(a),SEQ(CHAR(b),SEQ(CHAR(c),EMPTY)))) + + + +// a simple-minded regular expression matcher: +// it loops for examples like STAR(EMPTY) with +// strings this regular expression does not match + +def smatchers(rs: List[Rexp], s: List[Char]) : Boolean = (rs, s) match { + case (NULL::rs, s) => false + case (EMPTY::rs, s) => smatchers(rs, s) + case (CHAR(c)::rs, Nil) => false + case (CHAR(c)::rs, d::s) => (c ==d) && smatchers(rs, s) + case (ALT(r1, r2)::rs, s) => smatchers(r1::rs, s) || smatchers(r2::rs, s) + case (SEQ(r1, r2)::rs, s) => smatchers(r1::r2::rs, s) + case (STAR(r)::rs, s) => smatchers(rs, s) || smatchers(r::STAR(r)::rs, s) + case (Nil, s) => s == Nil +} + +def smatcher(r: Rexp, s: String) = smatchers(List(r), s.toList) + +// regular expression: a +println(smatcher(CHAR('a'), "ab")) + +// regular expression: a + (b o c) +println(smatcher(ALT(CHAR('a'), SEQ(CHAR('b'), CHAR('c'))), "ab")) + +// regular expression: a + (b o c) +println(smatcher(ALT(CHAR('a'), SEQ(CHAR('b'), CHAR('c'))), "bc")) + +// loops for regular expression epsilon* +//println(smatcher(STAR(EMPTY), "a")) + + + +// Regular expression matcher that works properly +//================================================ + +// nullable function: tests whether the regular +// expression can recognise the empty string +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true +} + +// derivative of a regular expression w.r.t. a character +def der (c: Char, r: Rexp) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL + case CHAR(d) => if (c == d) EMPTY else NULL + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r) => SEQ(der(c, r), STAR(r)) +} + +// derivative w.r.t. a string (iterates der) +def ders (s: List[Char], r: Rexp) : Rexp = s match { + case Nil => r + case c::s => ders(s, der(c, r)) +} + +// main matcher function +def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) + + +//examples + +println(matcher(SEQ(STAR("a"), STAR("b")), "bbaaa")) +println(matcher(ALT(STAR("a"), STAR("b")), "")) +println(matcher("abc", "")) +println(matcher(STAR(ALT(EMPTY, "a")), "")) +println(matcher(STAR(EMPTY), "a")) +println(matcher("cab","cab")) +println(matcher(STAR("a"),"aaa")) +println(matcher("cab" ,"cab")) +println(matcher(STAR("a"),"aaa")) + + diff -r 47f86885d481 -r e85600529ca5 scala/regexp2.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/regexp2.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,123 @@ + +// regular expressions including NOT +abstract class Rexp + +case object NULL extends Rexp +case object EMPTY extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp +case class NOT(r: Rexp) extends Rexp + + +// some convenience for typing in regular expressions +def charlist2rexp(s : List[Char]) : Rexp = s match { + case Nil => EMPTY + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) + + +// nullable function: tests whether the regular +// expression can recognise the empty string +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true + case NOT(r) => !(nullable(r)) +} + +// tests whether a regular expression +// cannot recognise more +def no_more (r: Rexp) : Boolean = r match { + case NULL => true + case EMPTY => false + case CHAR(_) => false + case ALT(r1, r2) => no_more(r1) && no_more(r2) + case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1) + case STAR(_) => false + case NOT(r) => !(no_more(r)) +} + + +// derivative of a regular expression w.r.t. a character +def der (c: Char, r: Rexp) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL + case CHAR(d) => if (c == d) EMPTY else NULL + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r) => SEQ(der(c, r), STAR(r)) + case NOT(r) => NOT(der (c, r)) +} + + +// regular expression for specifying +// ranges of characters +def RANGE(s : List[Char]) : Rexp = s match { + case Nil => NULL + case c::Nil => CHAR(c) + case c::s => ALT(CHAR(c), RANGE(s)) +} + +//one or more +def PLUS(r: Rexp) = SEQ(r, STAR(r)) + + +//some regular expressions +val LOWERCASE = RANGE("abcdefghijklmnopqrstuvwxyz".toList) +val UPPERCASE = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZ".toList) +val LETTER = ALT(LOWERCASE, UPPERCASE) +val DIGIT = RANGE("0123456789".toList) +val NONZERODIGIT = RANGE("123456789".toList) + +val IDENT = SEQ(LETTER, STAR(ALT(LETTER,DIGIT))) +val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") +val WHITESPACE = RANGE(" \n".toList) +val WHITESPACES = PLUS(WHITESPACE) + +val ALL = ALT(ALT(LETTER, DIGIT), WHITESPACE) +val COMMENT = SEQ(SEQ("/*", NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL)))), "*/") + + +// an example list of regular expressions +val regs: List[Rexp]= List("if", "then", "else", "+", IDENT, NUMBER, WHITESPACES, COMMENT) + + +def error (s: String) = throw new IllegalArgumentException ("Could not lex " + s) + +def munch(r: Rexp, s: List[Char], t: List[Char]) : Option[(List[Char], List[Char])] = + s match { + case Nil if (nullable(r)) => Some(Nil, t) + case Nil => None + case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, t) + case c::s if (no_more(der (c, r))) => None + case c::s => munch(der (c, r), s, t ::: List(c)) + } + +def one_string (regs: List[Rexp], s: List[Char]) : (List[Char], List[Char]) = { + val somes = regs.map { munch(_, s, Nil) } .flatten + if (somes == Nil) error(s.mkString) else (somes sortBy (_._1.length) head) +} + +def tokenize (regs: List[Rexp], s: List[Char]) : List[String] = s match { + case Nil => Nil + case _ => one_string(regs, s) match { + case (rest, s) => s.mkString :: tokenize(regs, rest) + } +} + +//examples +println(tokenize(regs, "if true then then 42 else +".toList)) +println(tokenize(regs, "if+true+then+then+42+else +".toList)) +println(tokenize(regs, "ifff if 34 34".toList)) +println(tokenize(regs, "/*ifff if */ hhjj /*34 */".toList)) +println(tokenize(regs, "/* if true then */ then 42 else +".toList)) +//println(tokenize(regs, "ifff $ if 34".toList)) // causes an error because of the symbol $ diff -r 47f86885d481 -r e85600529ca5 scala/regexp3.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/regexp3.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,141 @@ + +// regular expressions including NOT +abstract class Rexp + +case object NULL extends Rexp +case object EMPTY extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp +case class NOT(r: Rexp) extends Rexp + + +// some convenience for typing in regular expressions +def charlist2rexp(s : List[Char]) : Rexp = s match { + case Nil => EMPTY + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) + + +// nullable function: tests whether the regular +// expression can recognise the empty string +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true + case NOT(r) => !(nullable(r)) +} + +// tests whether a regular expression +// cannot recognise more +def no_more (r: Rexp) : Boolean = r match { + case NULL => true + case EMPTY => false + case CHAR(_) => false + case ALT(r1, r2) => no_more(r1) && no_more(r2) + case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1) + case STAR(_) => false + case NOT(r) => !(no_more(r)) +} + + +// derivative of a regular expression w.r.t. a character +def der (c: Char, r: Rexp) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL case CHAR(d) => if (c == d) EMPTY else NULL + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r) => SEQ(der(c, r), STAR(r)) + case NOT(r) => NOT(der (c, r)) +} + +// regular expression for specifying +// ranges of characters +def RANGE(s : List[Char]) : Rexp = s match { + case Nil => NULL + case c::Nil => CHAR(c) + case c::s => ALT(CHAR(c), RANGE(s)) +} + +// one or more +def PLUS(r: Rexp) = SEQ(r, STAR(r)) + +// some regular expressions +val LOWERCASE = RANGE("abcdefghijklmnopqrstuvwxyz".toList) +val UPPERCASE = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZ".toList) +val LETTER = ALT(LOWERCASE, UPPERCASE) +val DIGIT = RANGE("0123456789".toList) +val NONZERODIGIT = RANGE("123456789".toList) + +val IDENT = SEQ(LETTER, STAR(ALT(LETTER,DIGIT))) +val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") +val WHITESPACE = RANGE(" \n".toList) +val WHITESPACES = PLUS(WHITESPACE) + +val ALL = ALT(ALT(LETTER, DIGIT), WHITESPACE) +val COMMENT = SEQ(SEQ("/*", NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL)))), "*/") + + +// for classifying the strings that have been recognised +abstract class Token + +case object T_WHITESPACE extends Token +case object T_COMMENT extends Token +case class T_IDENT(s: String) extends Token +case class T_OP(s: String) extends Token +case class T_NUM(n: Int) extends Token +case class T_KEYWORD(s: String) extends Token + + +// an example list of syntactic rules +type Rule = (Rexp, List[Char] => Token) + +val rules: List[Rule]= + List(("if", (s) => T_KEYWORD(s.mkString)), + ("then", (s) => T_KEYWORD(s.mkString)), + ("else", (s) => T_KEYWORD(s.mkString)), + ("+", (s) => T_OP(s.mkString)), + (IDENT, (s) => T_IDENT(s.mkString)), + (NUMBER, (s) => T_NUM(s.mkString.toInt)), + (WHITESPACES, (s) => T_WHITESPACE), + (COMMENT, (s) => T_COMMENT)) + + +def error (s: String) = throw new IllegalArgumentException ("Cannot tokenize: " + s) + +def munch(r: Rexp, action: List[Char] => Token, s: List[Char], t: List[Char]) : Option[(List[Char], Token)] = + s match { + case Nil if (nullable(r)) => Some(Nil, action(t)) + case Nil => None + case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, action(t)) + case c::s if (no_more(der (c, r))) => None + case c::s => munch(der (c, r), action, s, t ::: List(c)) + } + +def one_token (rs: List[Rule], s: List[Char]) : (List[Char], Token) = { + val somes = rs.map { (r) => munch(r._1, r._2, s, Nil) } .flatten + if (somes == Nil) error(s.mkString) else (somes sortBy (_._1.length) head) +} + +def tokenize (rs: List[Rule], s: List[Char]) : List[Token] = s match { + case Nil => Nil + case _ => one_token(rs, s) match { + case (rest, token) => token :: tokenize(rs, rest) + } +} + +//examples +println(tokenize(rules, "if true then then 42 else +".toList)) +println(tokenize(rules, "if+true+then+then+42+else +".toList)) +println(tokenize(rules, "ifff if 34 34".toList)) +println(tokenize(rules, "/*ifff if */ hhjj /*34 */".toList)) +println(tokenize(rules, "/* if true then */ then 42 else +".toList)) +//println(tokenize(rules, "ifff $ if 34".toList)) // causes an error because of the symbol $ diff -r 47f86885d481 -r e85600529ca5 scala/regexp4.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/regexp4.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,168 @@ +// regular expressions +abstract class Rexp + +case object NULL extends Rexp +case object EMPTY extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp +case class NOT(r: Rexp) extends Rexp + + +// some convenience for typing in regular expressions +def charlist2rexp(s : List[Char]) : Rexp = s match { + case Nil => EMPTY + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) + + +// nullable function: tests whether the regular +// expression can recognise the empty string +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true + case NOT(r) => !(nullable(r)) +} + +// tests whether a regular expression +// recognises nothing +def zeroable (r: Rexp) : Boolean = r match { + case NULL => true + case EMPTY => false + case CHAR(_) => false + case ALT(r1, r2) => zeroable(r1) && zeroable(r2) + case SEQ(r1, r2) => zeroable(r1) || zeroable(r2) + case STAR(_) => false + case NOT(r) => !(zeroable(r)) +} + +def starts_with (r: Rexp, c: Char) : Boolean = r match { + case NULL => false + case EMPTY => false + case CHAR(d) => (c == d) + case ALT(r1, r2) => starts_with(r1, c) || starts_with(r2, c) + case SEQ(r1, r2) => if (nullable(r1)) (starts_with(r1, c) || starts_with(r2, c)) + else starts_with(r1, c) + case STAR(r) => starts_with(r, c) + case NOT(r) => !(starts_with(r, c)) +} + +// derivative of a regular expression w.r.t. a character +def der (c: Char, r: Rexp) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL + case CHAR(d) => if (c == d) EMPTY else NULL + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r) => SEQ(der(c, r), STAR(r)) + case NOT(r) => NOT(der (c, r)) +} + +// derivative w.r.t. a string (iterates der) +def ders (s: List[Char], r: Rexp) : Rexp = s match { + case Nil => r + case c::s => ders(s, der(c, r)) +} + +// main matcher function +def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) + + +// regular expression for specifying +// ranges of characters +def RANGE(s : List[Char]) : Rexp = s match { + case Nil => NULL + case c::Nil => CHAR(c) + case c::s => ALT(CHAR(c), RANGE(s)) +} + +//one or more +def PLUS(r: Rexp) = SEQ(r, STAR(r)) + + +//some regular expressions +val LOWERCASE = RANGE("abcdefghijklmnopqrstuvwxyz".toList) +val UPPERCASE = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZ".toList) +val LETTER = ALT(LOWERCASE, UPPERCASE) +val DIGITS = RANGE("0123456789".toList) +val NONZERODIGITS = RANGE("123456789".toList) + +val IDENT = SEQ(LETTER, STAR(ALT(LETTER,DIGITS))) +val NUMBER = ALT(SEQ(NONZERODIGITS, STAR(DIGITS)), "0") +val WHITESPACE = RANGE(" \n".toList) +val SYMBOLS = RANGE("/*".toList) + +val ALL = ALT(ALT(ALT(LETTER, DIGITS), WHITESPACE), SYMBOLS) + +val COMMENT = SEQ(SEQ("/*", NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL)))), "*/") + +println(matcher(NUMBER, "0")) +println(matcher(NUMBER, "01")) +println(matcher(NUMBER, "123450")) + +println(matcher(SEQ(STAR("a"), STAR("b")), "bbaaa")) +println(matcher(ALT(STAR("a"), STAR("b")), "")) +println(matcher("abc", "")) +println(matcher(STAR(ALT(EMPTY, "a")), "")) +println(matcher(STAR(EMPTY), "a")) +println(matcher("cab","cab")) +println(matcher(STAR("a"),"aaa")) +println(matcher("cab" ,"cab")) +println(matcher(STAR("a"),"aaa")) + +println(matcher(COMMENT, "/* */")) +println(matcher(COMMENT, "/* foobar comment */")) +println(matcher(COMMENT, "/* test */ test */")) + +// an example list of regular expressions +val regs: List[Rexp]= List("if", "then", "else", "+", IDENT, NUMBER, COMMENT, WHITESPACE) + + +def error (s: String) = throw new IllegalArgumentException ("Could not lex " + s) + +def munch(r: Rexp, s: List[Char], t: List[Char]) : Option[(List[Char], List[Char])] = + if (zeroable(r)) None else s match { + case Nil => if (nullable(r)) Some(Nil, t) else None + case c::s if (zeroable(der (c, r)) && nullable(r)) => Some(c::s, t) + //case c::s if (zeroable(der (c, r))) => None + case c::s => munch(der (c, r), s, t ::: List(c)) +} + + +def lex_one (regs: List[Rexp], s: List[Char]) : (List[Char], List[Char]) = { + val somes = regs.map { munch(_, s, Nil) } .flatten + if (somes == Nil) error(s.mkString) else (somes sortBy (_._1.length) head) +} + +def lex_all (regs: List[Rexp], s: List[Char]) : List[String] = s match { + case Nil => Nil + case _ => lex_one(regs, s) match { + case (rest, s) => s.mkString :: lex_all(regs, rest) + } +} + + + +starts_with(der('/', COMMENT), '*') + +munch(COMMENT, "/*ifff if 34 */".toList, Nil) +val COMMENT2 = NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL))) + +der('a', COMMENT2) +zeroable(der('a', COMMENT2)) + +matcher(COMMENT2, "ifff if 34") +munch(COMMENT2, "ifff if 34".toList, Nil) +starts_with(COMMENT2, 'i') +lex_all(regs, "ifff if 34".toList) +lex_all(regs, "ifff $ if 34".toList) + diff -r 47f86885d481 -r e85600529ca5 scala/regexp5.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/regexp5.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,177 @@ +// regular expressions +abstract class Rexp + +case object NULL extends Rexp +case object EMPTY extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp +case class NOT(r: Rexp) extends Rexp + + +// some convenience for typing in regular expressions +def charlist2rexp(s : List[Char]) : Rexp = s match { + case Nil => EMPTY + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) + + +// nullable function: tests whether the regular +// expression can recognise the empty string +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true + case NOT(r) => !(nullable(r)) +} + +// tests whether a regular expression +// recognises nothing +def zeroable (r: Rexp) : Boolean = r match { + case NULL => true + case EMPTY => false + case CHAR(_) => false + case ALT(r1, r2) => zeroable(r1) && zeroable(r2) + case SEQ(r1, r2) => if (nullable(r1)) (zeroable(r1) && zeroable(r2)) else zeroable(r1) + //zeroable(r1) || zeroable(r2) + case STAR(_) => false + case NOT(r) => !(zeroable(r)) +} + + +// derivative of a regular expression w.r.t. a character +def der (c: Char, r: Rexp) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL + case CHAR(d) => if (c == d) EMPTY else NULL + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r) => SEQ(der(c, r), STAR(r)) + case NOT(r) => NOT(der (c, r)) +} + +// derivative w.r.t. a string (iterates der) +def ders (s: List[Char], r: Rexp) : Rexp = s match { + case Nil => r + case c::s => ders(s, der(c, r)) +} + +// main matcher function +def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) + + +// regular expression for specifying +// ranges of characters +def RANGE(s : List[Char]) : Rexp = s match { + case Nil => NULL + case c::Nil => CHAR(c) + case c::s => ALT(CHAR(c), RANGE(s)) +} + +//one or more +def PLUS(r: Rexp) = SEQ(r, STAR(r)) + + +//some regular expressions +val LOWERCASE = RANGE("abcdefghijklmnopqrstuvwxyz".toList) +val UPPERCASE = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZ".toList) +val LETTER = ALT(LOWERCASE, UPPERCASE) +val DIGITS = RANGE("0123456789".toList) +val NONZERODIGITS = RANGE("123456789".toList) + +val IDENT = SEQ(LETTER, STAR(ALT(LETTER,DIGITS))) +val NUMBER = ALT(SEQ(NONZERODIGITS, STAR(DIGITS)), "0") +val WHITESPACE = RANGE(" \n".toList) + +val ALL = ALT(ALT(LETTER, DIGITS), WHITESPACE) + +val COMMENT = SEQ(SEQ("/*", NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL)))), "*/") + +println(matcher(NUMBER, "0")) +println(matcher(NUMBER, "01")) +println(matcher(NUMBER, "123450")) + +println(matcher(SEQ(STAR("a"), STAR("b")), "bbaaa")) +println(matcher(ALT(STAR("a"), STAR("b")), "")) +println(matcher("abc", "")) +println(matcher(STAR(ALT(EMPTY, "a")), "")) +println(matcher(STAR(EMPTY), "a")) +println(matcher("cab","cab")) +println(matcher(STAR("a"),"aaa")) +println(matcher("cab" ,"cab")) +println(matcher(STAR("a"),"aaa")) + +println(matcher(COMMENT, "/* */")) +println(matcher(COMMENT, "/* 34 */")) +println(matcher(COMMENT, "/* foobar comment */")) +println(matcher(COMMENT, "/* test */ test */")) + +// an example list of regular expressions + +abstract class Token + +case object T_WHITESPACE extends Token +case object T_COMMENT extends Token +case class T_IDENT(s: String) extends Token +case class T_OP(s: String) extends Token +case class T_NUM(n: Int) extends Token +case class T_KEYWORD(s: String) extends Token + +val regs: List[Rexp]= List("if", "then", "else", "+", IDENT, NUMBER, WHITESPACE) + +type Rule = (Rexp, List[Char] => Token) + +val rules: List[Rule]= + List(("if", (s) => T_KEYWORD(s.mkString)), + ("then", (s) => T_KEYWORD(s.mkString)), + ("else", (s) => T_KEYWORD(s.mkString)), + ("+", (s) => T_OP(s.mkString)), + (IDENT, (s) => T_IDENT(s.mkString)), + (NUMBER, (s) => T_NUM(s.mkString.toInt)), + (WHITESPACE, (s) => T_WHITESPACE), + (COMMENT, (s) => T_COMMENT)) + + +def error (s: String) = throw new IllegalArgumentException ("Could not lex " + s) + +def munch(r: Rexp, action: List[Char] => Token, s: List[Char], t: List[Char]) : Option[(List[Char], Token)] = +{ println("string " + s) + println(" rexp " + r) + s match { + case Nil if (nullable(r)) => Some(Nil, action(t)) + case Nil => { println("1"); None } + case c::s if (zeroable(der (c, r)) && nullable(r)) => Some(c::s, action(t)) + case c::s if (zeroable(der (c, r))) => { println("2"); None } + case c::s => munch(der (c, r), action, s, t ::: List(c)) + } +} + +def lex_one (rs: List[Rule], s: List[Char]) : (List[Char], Token) = { + val somes = rs.map { (r) => munch(r._1, r._2, s, Nil) } .flatten + if (somes == Nil) error(s.mkString) else (somes sortBy (_._1.length) head) +} + +def lex_all (rs: List[Rule], s: List[Char]) : List[Token] = s match { + case Nil => Nil + case _ => lex_one(rs, s) match { + case (rest, t) => t :: lex_all(rs, rest) + } +} + + + +println(matcher(COMMENT, "/*ifff if 34 34*/")) +rules.map { (r) => munch(r._1, r._2, "/*ifff if 34 34*/ ".toList, Nil) } +println(lex_all(rules, "ifff if 34 34".toList)) +println(lex_all(rules, " /*ifff if 34 34*/ ".toList)) +println(lex_all(rules, "ifff $ if 34".toList)) + + diff -r 47f86885d481 -r e85600529ca5 scala/scraper.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/scraper.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,57 @@ +import java.io.OutputStreamWriter +import java.net.URL +import scala.io.Source.fromInputStream + +val url = new URL("http://www.envir.gov.cn/eng/airep/index.asp") + +//connect to url +val conn = url.openConnection +conn.setRequestProperty("User-Agent", "") +conn.setDoOutput(true) +conn.connect + +//sending data +val wr = new OutputStreamWriter(conn.getOutputStream()) +//wr.write("Fdate=2012-9-24&Tdate=2012-09-25") +//wr.write("Fdate=2012-9-18&Tdate=2012-09-25") +wr.write("Fdate=2001-5-18&Tdate=2012-09-25") +wr.flush +wr.close + +//receiving data +val page = fromInputStream(conn.getInputStream).getLines.mkString("\n") + +//println(page) + +// regular expression . excludes newlines, +// therefore we have to use [\S\s] +val regex1 = """[\S\s]*?""".r +val rows = regex1.findAllIn(page).toList + +//print(rows) + +val regex2 = """([\S\s]*?)""".r + +def aux(s: String) : Array[String] = { + for (m <- regex2.findAllIn(s).toArray) yield m match { + case regex2(value) => value.trim + } +} + +val data = rows.map { aux } + +def compare(i: Int)(e: Array[String], f: Array[String]) = e(i).toInt < f(i).toInt + +//day with highest particle pollution (PM_10) +data.sortWith(compare(1)).last + +//day with highest sulfur dioxide (SO_2) +data.sortWith(compare(2)).last + +//day with highest nitro dioxide (NO_2) +data.sortWith(compare(3)).last + +//days with highest PM_10 +val groups = data.groupBy(_(1).toInt) +val max_key = groups.keySet.max +groups(max_key) diff -r 47f86885d481 -r e85600529ca5 scala/while.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/while.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,231 @@ +// A parser and evaluator for teh while language +// +import matcher._ +import parser._ + + +// some regular expressions +val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_") +val DIGIT = RANGE("0123456789") +val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT))) +val NUM = PLUS(DIGIT) +val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false") +val SEMI: Rexp = ";" +val OP: Rexp = ALTS(":=", "=", "-", "+", "*", "!=", "<", ">") +val WHITESPACE = PLUS(RANGE(" \n")) +val RPAREN: Rexp = ")" +val LPAREN: Rexp = "(" +val BEGIN: Rexp = "{" +val END: Rexp = "}" + +// tokens for classifying the strings that have been recognised +abstract class Token +case object T_WHITESPACE extends Token +case object T_SEMI extends Token +case object T_LPAREN extends Token +case object T_RPAREN extends Token +case object T_BEGIN extends Token +case object T_END extends Token +case class T_ID(s: String) extends Token +case class T_OP(s: String) extends Token +case class T_NUM(s: String) extends Token +case class T_KWD(s: String) extends Token + +val lexing_rules: List[(Rexp, List[Char] => Token)] = + List((KEYWORD, (s) => T_KWD(s.mkString)), + (ID, (s) => T_ID(s.mkString)), + (OP, (s) => T_OP(s.mkString)), + (NUM, (s) => T_NUM(s.mkString)), + (SEMI, (s) => T_SEMI), + (LPAREN, (s) => T_LPAREN), + (RPAREN, (s) => T_RPAREN), + (BEGIN, (s) => T_BEGIN), + (END, (s) => T_END), + (WHITESPACE, (s) => T_WHITESPACE)) + +// the tokenizer +val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE)) + +// the abstract syntax trees +abstract class Stmt +abstract class AExp +abstract class BExp +type Block = List[Stmt] +case object Skip extends Stmt +case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt +case class While(b: BExp, bl: Block) extends Stmt +case class Assign(s: String, a: AExp) extends Stmt + +case class Var(s: String) extends AExp +case class Num(i: Int) extends AExp +case class Aop(o: String, a1: AExp, a2: AExp) extends AExp + +case object True extends BExp +case object False extends BExp +case class Bop(o: String, a1: AExp, a2: AExp) extends BExp + +// atomic parsers +case class TokParser(tok: Token) extends Parser[List[Token], Token] { + def parse(ts: List[Token]) = ts match { + case t::ts if (t == tok) => Set((t, ts)) + case _ => Set () + } +} +implicit def token2tparser(t: Token) = TokParser(t) + +case object NumParser extends Parser[List[Token], Int] { + def parse(ts: List[Token]) = ts match { + case T_NUM(s)::ts => Set((s.toInt, ts)) + case _ => Set () + } +} + +case object IdParser extends Parser[List[Token], String] { + def parse(ts: List[Token]) = ts match { + case T_ID(s)::ts => Set((s, ts)) + case _ => Set () + } +} + + +// arithmetic expressions +lazy val AExp: Parser[List[Token], AExp] = + (T ~ T_OP("+") ~ AExp) ==> { case ((x, y), z) => Aop("+", x, z): AExp } || + (T ~ T_OP("-") ~ AExp) ==> { case ((x, y), z) => Aop("-", x, z): AExp } || T +lazy val T: Parser[List[Token], AExp] = + (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F +lazy val F: Parser[List[Token], AExp] = + (T_LPAREN ~> AExp <~ T_RPAREN) || + IdParser ==> Var || + NumParser ==> Num + +// boolean expressions +lazy val BExp: Parser[List[Token], BExp] = + (AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Bop("=", x, z): BExp } || + (AExp ~ T_OP("!=") ~ AExp) ==> { case ((x, y), z) => Bop("!=", x, z): BExp } || + (AExp ~ T_OP("<") ~ AExp) ==> { case ((x, y), z) => Bop("<", x, z): BExp } || + (AExp ~ T_OP(">") ~ AExp) ==> { case ((x, y), z) => Bop(">", x, z): BExp } || + (T_KWD("true") ==> ((_) => True)) || + (T_KWD("false") ==> ((_) => False: BExp)) + +lazy val Stmt: Parser[List[Token], Stmt] = + (T_KWD("skip") ==> ((_) => Skip: Stmt)) || + (IdParser ~ T_OP(":=") ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } || + (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Block ~ T_KWD("else") ~ Block) ==> + { case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } || + (T_KWD("while") ~ BExp ~ T_KWD("do") ~ Block) ==> { case (((x, y), z), w) => While(y, w) } + +lazy val Stmts: Parser[List[Token], Block] = + (Stmt ~ T_SEMI ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } || + (Stmt ==> ((s) => List(s) : Block)) + +lazy val Block: Parser[List[Token], Block] = + (T_BEGIN ~> Stmts <~ T_END) || + (Stmt ==> ((s) => List(s))) + + +// examples +val p1 = "x := 5" +val p1_toks = Tok.fromString(p1) +val p1_ast = Block.parse_all(p1_toks) +println(p1_toks) +println(p1_ast) + +val p1a = "{ x := 5; y := 8}" +val p1a_toks = Tok.fromString(p1a) +val p1a_ast = Block.parse_all(p1a_toks) +println(p1a_ast) + +val p2 = "5 = 6" +val p2_toks = Tok.fromString(p2) +val p2_ast = BExp.parse_all(p2_toks) +println(p2_ast) + +val p2a = "true" +val p2a_toks = Tok.fromString(p2a) +val p2a_ast = BExp.parse_all(p2a_toks) +println(p2a_ast) + +val p3 = "if true then skip else skip" +val p3_toks = Tok.fromString(p3) +val p3_ast = Stmt.parse_all(p3_toks) +println(p3_ast) + +val p3a = "if true then x := 5 else x := 10" +val p3a_toks = Tok.fromString(p3a) +val p3a_ast = Stmt.parse_all(p3a_toks) +println(p3a_ast) + +val p3b = "if false then x := 5 else x := 10" +val p3b_toks = Tok.fromString(p3b) +val p3b_ast = Stmt.parse_all(p3b_toks) +println(p3b_ast) + +// multiplication +val p4 = """{ x := 5; + y := 4; + r := 0; + while y > 0 do { + r := r + x; + y := y - 1 + } + }""" +val p4_toks = Tok.fromString(p4) +val p4_ast = Block.parse_all(p4_toks) +println(p4_ast) + +val p5 = """ + n := 9; + minus1 := 0; + minus2 := 1; + temp := 0; + while n > 0 do { + temp := minus2; + minus2 := minus1 + minus2; + minus1 := temp; + n := n - 1 + }; + fib_res := minus2 +""" +val p5_toks = Tok.fromString(p5) +val p5_ast = Stmts.parse_all(p5_toks) + +// interpreter +type Env = Map[String, Int] + +def eval_bexp(b: BExp, env: Env) : Boolean = b match { + case True => true + case False => false + case Bop("=", a1, a2) => eval_aexp(a1, env) == eval_aexp(a2, env) + case Bop("!=", a1, a2) => !(eval_aexp(a1, env) == eval_aexp(a2, env)) + case Bop(">", a1, a2) => eval_aexp(a1, env) > eval_aexp(a2, env) + case Bop("<", a1, a2) => eval_aexp(a1, env) < eval_aexp(a2, env) +} + +def eval_aexp(a: AExp, env : Env) : Int = a match { + case Num(i) => i + case Var(s) => env(s) + case Aop("+", a1, a2) => eval_aexp(a1, env) + eval_aexp(a2, env) + case Aop("-", a1, a2) => eval_aexp(a1, env) - eval_aexp(a2, env) + case Aop("*", a1, a2) => eval_aexp(a1, env) * eval_aexp(a2, env) +} + +def eval_stmt(s: Stmt, env: Env) : Env = s match { + case Skip => env + case Assign(x, a) => env + (x -> eval_aexp(a, env)) + case If(b, bl1, bl2) => if (eval_bexp(b, env)) eval_bl(bl1, env) else eval_bl(bl2, env) + case While(b, bl) => + if (eval_bexp(b, env)) eval_stmt(While(b, bl), eval_bl(bl, env)) + else env +} + +def eval_bl(bl: Block, env: Env) : Env = bl match { + case Nil => env + case s::bl => eval_bl(bl, eval_stmt(s, env)) +} + +//examples +println(eval_stmt(p3a_ast.head, Map.empty)) +println(eval_stmt(p3b_ast.head, Map.empty)) +println(eval_bl(p4_ast.head, Map.empty)) +println(eval_bl(p5_ast.head, Map.empty)) diff -r 47f86885d481 -r e85600529ca5 scala/while1.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scala/while1.scala Sat Jun 15 09:11:11 2013 -0400 @@ -0,0 +1,220 @@ +// A parser and evaluator for the WHILE language +// +import matcher._ +import parser._ + + +// some regular expressions +val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_") +val DIGIT = RANGE("0123456789") +val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT))) +val NUM = PLUS(DIGIT) +val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false", "write") +val SEMI: Rexp = ";" +val OP: Rexp = ALTS(":=", "=", "-", "+", "*", "!=", "<", ">") +val WHITESPACE = PLUS(RANGE(" \n")) +val RPAREN: Rexp = ")" +val LPAREN: Rexp = "(" +val BEGIN: Rexp = "{" +val END: Rexp = "}" +val COMMENT = SEQS("/*", NOT(SEQS(STAR(ALLC), "*/", STAR(ALLC))), "*/") + +// tokens for classifying the strings that have been recognised +abstract class Token +case object T_WHITESPACE extends Token +case object T_COMMENT extends Token +case object T_SEMI extends Token +case object T_LPAREN extends Token +case object T_RPAREN extends Token +case object T_BEGIN extends Token +case object T_END extends Token +case class T_ID(s: String) extends Token +case class T_OP(s: String) extends Token +case class T_NUM(s: String) extends Token +case class T_KWD(s: String) extends Token + +val lexing_rules: List[(Rexp, List[Char] => Token)] = + List((KEYWORD, (s) => T_KWD(s.mkString)), + (ID, (s) => T_ID(s.mkString)), + (OP, (s) => T_OP(s.mkString)), + (NUM, (s) => T_NUM(s.mkString)), + (SEMI, (s) => T_SEMI), + (LPAREN, (s) => T_LPAREN), + (RPAREN, (s) => T_RPAREN), + (BEGIN, (s) => T_BEGIN), + (END, (s) => T_END), + (WHITESPACE, (s) => T_WHITESPACE), + (COMMENT, (s) => T_COMMENT)) + +// the tokenizer +val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE, T_COMMENT)) + +// the abstract syntax trees +abstract class Stmt +abstract class AExp +abstract class BExp +type Block = List[Stmt] +case object Skip extends Stmt +case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt +case class While(b: BExp, bl: Block) extends Stmt +case class Assign(s: String, a: AExp) extends Stmt +case class Write(s: String) extends Stmt + +case class Var(s: String) extends AExp +case class Num(i: Int) extends AExp +case class Aop(o: String, a1: AExp, a2: AExp) extends AExp + +case object True extends BExp +case object False extends BExp +case class Bop(o: String, a1: AExp, a2: AExp) extends BExp + +// atomic parsers +case class TokParser(tok: Token) extends Parser[List[Token], Token] { + def parse(ts: List[Token]) = ts match { + case t::ts if (t == tok) => Set((t, ts)) + case _ => Set () + } +} +implicit def token2tparser(t: Token) = TokParser(t) + +case object NumParser extends Parser[List[Token], Int] { + def parse(ts: List[Token]) = ts match { + case T_NUM(s)::ts => Set((s.toInt, ts)) + case _ => Set () + } +} + +case object IdParser extends Parser[List[Token], String] { + def parse(ts: List[Token]) = ts match { + case T_ID(s)::ts => Set((s, ts)) + case _ => Set () + } +} + + +// arithmetic expressions +lazy val AExp: Parser[List[Token], AExp] = + (T ~ T_OP("+") ~ AExp) ==> { case ((x, y), z) => Aop("+", x, z): AExp } || + (T ~ T_OP("-") ~ AExp) ==> { case ((x, y), z) => Aop("-", x, z): AExp } || T +lazy val T: Parser[List[Token], AExp] = + (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F +lazy val F: Parser[List[Token], AExp] = + (T_LPAREN ~> AExp <~ T_RPAREN) || + IdParser ==> Var || + NumParser ==> Num + +// boolean expressions +lazy val BExp: Parser[List[Token], BExp] = + (T_KWD("true") ==> ((_) => True: BExp)) || + (T_KWD("false") ==> ((_) => False: BExp)) || + (T_LPAREN ~> BExp <~ T_RPAREN) || + (AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Bop("=", x, z): BExp } || + (AExp ~ T_OP("!=") ~ AExp) ==> { case ((x, y), z) => Bop("!=", x, z): BExp } || + (AExp ~ T_OP("<") ~ AExp) ==> { case ((x, y), z) => Bop("<", x, z): BExp } || + (AExp ~ T_OP(">") ~ AExp) ==> { case ((x, y), z) => Bop("<", z, x): BExp } + +lazy val Stmt: Parser[List[Token], Stmt] = + (T_KWD("skip") ==> ((_) => Skip: Stmt)) || + (IdParser ~ T_OP(":=") ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } || + (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Block ~ T_KWD("else") ~ Block) ==> + { case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } || + (T_KWD("while") ~ BExp ~ T_KWD("do") ~ Block) ==> { case (((x, y), z), w) => While(y, w) } || + (T_KWD("write") ~ IdParser) ==> { case (x, y) => Write(y) } + +lazy val Stmts: Parser[List[Token], Block] = + (Stmt ~ T_SEMI ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } || + (Stmt ==> ((s) => List(s) : Block)) + +lazy val Block: Parser[List[Token], Block] = + (T_BEGIN ~> Stmts <~ T_END) || + (Stmt ==> ((s) => List(s))) + +// interpreter +type Env = Map[String, Int] + +def eval_bexp(b: BExp, env: Env) : Boolean = b match { + case True => true + case False => false + case Bop("=", a1, a2) => eval_aexp(a1, env) == eval_aexp(a2, env) + case Bop("!=", a1, a2) => !(eval_aexp(a1, env) == eval_aexp(a2, env)) + case Bop("<", a1, a2) => eval_aexp(a1, env) < eval_aexp(a2, env) +} + +def eval_aexp(a: AExp, env : Env) : Int = a match { + case Num(i) => i + case Var(s) => env(s) + case Aop("+", a1, a2) => eval_aexp(a1, env) + eval_aexp(a2, env) + case Aop("-", a1, a2) => eval_aexp(a1, env) - eval_aexp(a2, env) + case Aop("*", a1, a2) => eval_aexp(a1, env) * eval_aexp(a2, env) +} + +def eval_stmt(s: Stmt, env: Env) : Env = s match { + case Skip => env + case Assign(x, a) => env + (x -> eval_aexp(a, env)) + case If(b, bl1, bl2) => if (eval_bexp(b, env)) eval_bl(bl1, env) else eval_bl(bl2, env) + case While(b, bl) => + if (eval_bexp(b, env)) eval_stmt(While(b, bl), eval_bl(bl, env)) + else env + case Write(x) => { println(env(x)); env } +} + +def eval_bl(bl: Block, env: Env) : Env = bl match { + case Nil => env + case s::bl => eval_bl(bl, eval_stmt(s, env)) +} + +def eval_prog(name: String) : Env = { + val tks = Tok.fromFile(name) + val ast = Stmts.parse_single(tks) + eval_bl(ast, Map.empty) +} + + +//examples + +//eval_prog("loops.while") +eval_prog("fib.while") + + +def time_needed[T](i: Int, code: => T) = { + val start = System.nanoTime() + for (j <- 1 to i) code + val end = System.nanoTime() + (end - start)/(i * 1.0e9) +} + + +val test_prog = """ +start := XXX; +x := start; +y := start; +z := start; +while 0 < x do { + while 0 < y do { + while 0 < z do { + z := z - 1 + }; + z := start; + y := y - 1 + }; + y := start; + x := x - 1 +} +""" + + + +def eval_test(n: Int) : Unit = { + val tks = Tok.fromString(test_prog.replaceAllLiterally("XXX", n.toString)) + val ast = Stmts.parse_single(tks) + println(n + " " + time_needed(2, eval_bl(ast, Map.empty))) +} + +List(1, 200, 400, 600, 800, 1000, 1200, 1400, 1600).map(eval_test(_)) + + + + + + + diff -r 47f86885d481 -r e85600529ca5 scraper.scala --- a/scraper.scala Sun Dec 23 00:38:56 2012 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,57 +0,0 @@ -import java.io.OutputStreamWriter -import java.net.URL -import scala.io.Source.fromInputStream - -val url = new URL("http://www.envir.gov.cn/eng/airep/index.asp") - -//connect to url -val conn = url.openConnection -conn.setRequestProperty("User-Agent", "") -conn.setDoOutput(true) -conn.connect - -//sending data -val wr = new OutputStreamWriter(conn.getOutputStream()) -//wr.write("Fdate=2012-9-24&Tdate=2012-09-25") -//wr.write("Fdate=2012-9-18&Tdate=2012-09-25") -wr.write("Fdate=2001-5-18&Tdate=2012-09-25") -wr.flush -wr.close - -//receiving data -val page = fromInputStream(conn.getInputStream).getLines.mkString("\n") - -//println(page) - -// regular expression . excludes newlines, -// therefore we have to use [\S\s] -val regex1 = """[\S\s]*?""".r -val rows = regex1.findAllIn(page).toList - -//print(rows) - -val regex2 = """([\S\s]*?)""".r - -def aux(s: String) : Array[String] = { - for (m <- regex2.findAllIn(s).toArray) yield m match { - case regex2(value) => value.trim - } -} - -val data = rows.map { aux } - -def compare(i: Int)(e: Array[String], f: Array[String]) = e(i).toInt < f(i).toInt - -//day with highest particle pollution (PM_10) -data.sortWith(compare(1)).last - -//day with highest sulfur dioxide (SO_2) -data.sortWith(compare(2)).last - -//day with highest nitro dioxide (NO_2) -data.sortWith(compare(3)).last - -//days with highest PM_10 -val groups = data.groupBy(_(1).toInt) -val max_key = groups.keySet.max -groups(max_key) diff -r 47f86885d481 -r e85600529ca5 while.scala --- a/while.scala Sun Dec 23 00:38:56 2012 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,231 +0,0 @@ -// A parser and evaluator for teh while language -// -import matcher._ -import parser._ - - -// some regular expressions -val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_") -val DIGIT = RANGE("0123456789") -val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT))) -val NUM = PLUS(DIGIT) -val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false") -val SEMI: Rexp = ";" -val OP: Rexp = ALTS(":=", "=", "-", "+", "*", "!=", "<", ">") -val WHITESPACE = PLUS(RANGE(" \n")) -val RPAREN: Rexp = ")" -val LPAREN: Rexp = "(" -val BEGIN: Rexp = "{" -val END: Rexp = "}" - -// tokens for classifying the strings that have been recognised -abstract class Token -case object T_WHITESPACE extends Token -case object T_SEMI extends Token -case object T_LPAREN extends Token -case object T_RPAREN extends Token -case object T_BEGIN extends Token -case object T_END extends Token -case class T_ID(s: String) extends Token -case class T_OP(s: String) extends Token -case class T_NUM(s: String) extends Token -case class T_KWD(s: String) extends Token - -val lexing_rules: List[(Rexp, List[Char] => Token)] = - List((KEYWORD, (s) => T_KWD(s.mkString)), - (ID, (s) => T_ID(s.mkString)), - (OP, (s) => T_OP(s.mkString)), - (NUM, (s) => T_NUM(s.mkString)), - (SEMI, (s) => T_SEMI), - (LPAREN, (s) => T_LPAREN), - (RPAREN, (s) => T_RPAREN), - (BEGIN, (s) => T_BEGIN), - (END, (s) => T_END), - (WHITESPACE, (s) => T_WHITESPACE)) - -// the tokenizer -val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE)) - -// the abstract syntax trees -abstract class Stmt -abstract class AExp -abstract class BExp -type Block = List[Stmt] -case object Skip extends Stmt -case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt -case class While(b: BExp, bl: Block) extends Stmt -case class Assign(s: String, a: AExp) extends Stmt - -case class Var(s: String) extends AExp -case class Num(i: Int) extends AExp -case class Aop(o: String, a1: AExp, a2: AExp) extends AExp - -case object True extends BExp -case object False extends BExp -case class Bop(o: String, a1: AExp, a2: AExp) extends BExp - -// atomic parsers -case class TokParser(tok: Token) extends Parser[List[Token], Token] { - def parse(ts: List[Token]) = ts match { - case t::ts if (t == tok) => Set((t, ts)) - case _ => Set () - } -} -implicit def token2tparser(t: Token) = TokParser(t) - -case object NumParser extends Parser[List[Token], Int] { - def parse(ts: List[Token]) = ts match { - case T_NUM(s)::ts => Set((s.toInt, ts)) - case _ => Set () - } -} - -case object IdParser extends Parser[List[Token], String] { - def parse(ts: List[Token]) = ts match { - case T_ID(s)::ts => Set((s, ts)) - case _ => Set () - } -} - - -// arithmetic expressions -lazy val AExp: Parser[List[Token], AExp] = - (T ~ T_OP("+") ~ AExp) ==> { case ((x, y), z) => Aop("+", x, z): AExp } || - (T ~ T_OP("-") ~ AExp) ==> { case ((x, y), z) => Aop("-", x, z): AExp } || T -lazy val T: Parser[List[Token], AExp] = - (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F -lazy val F: Parser[List[Token], AExp] = - (T_LPAREN ~> AExp <~ T_RPAREN) || - IdParser ==> Var || - NumParser ==> Num - -// boolean expressions -lazy val BExp: Parser[List[Token], BExp] = - (AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Bop("=", x, z): BExp } || - (AExp ~ T_OP("!=") ~ AExp) ==> { case ((x, y), z) => Bop("!=", x, z): BExp } || - (AExp ~ T_OP("<") ~ AExp) ==> { case ((x, y), z) => Bop("<", x, z): BExp } || - (AExp ~ T_OP(">") ~ AExp) ==> { case ((x, y), z) => Bop(">", x, z): BExp } || - (T_KWD("true") ==> ((_) => True)) || - (T_KWD("false") ==> ((_) => False: BExp)) - -lazy val Stmt: Parser[List[Token], Stmt] = - (T_KWD("skip") ==> ((_) => Skip: Stmt)) || - (IdParser ~ T_OP(":=") ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } || - (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Block ~ T_KWD("else") ~ Block) ==> - { case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } || - (T_KWD("while") ~ BExp ~ T_KWD("do") ~ Block) ==> { case (((x, y), z), w) => While(y, w) } - -lazy val Stmts: Parser[List[Token], Block] = - (Stmt ~ T_SEMI ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } || - (Stmt ==> ((s) => List(s) : Block)) - -lazy val Block: Parser[List[Token], Block] = - (T_BEGIN ~> Stmts <~ T_END) || - (Stmt ==> ((s) => List(s))) - - -// examples -val p1 = "x := 5" -val p1_toks = Tok.fromString(p1) -val p1_ast = Block.parse_all(p1_toks) -println(p1_toks) -println(p1_ast) - -val p1a = "{ x := 5; y := 8}" -val p1a_toks = Tok.fromString(p1a) -val p1a_ast = Block.parse_all(p1a_toks) -println(p1a_ast) - -val p2 = "5 = 6" -val p2_toks = Tok.fromString(p2) -val p2_ast = BExp.parse_all(p2_toks) -println(p2_ast) - -val p2a = "true" -val p2a_toks = Tok.fromString(p2a) -val p2a_ast = BExp.parse_all(p2a_toks) -println(p2a_ast) - -val p3 = "if true then skip else skip" -val p3_toks = Tok.fromString(p3) -val p3_ast = Stmt.parse_all(p3_toks) -println(p3_ast) - -val p3a = "if true then x := 5 else x := 10" -val p3a_toks = Tok.fromString(p3a) -val p3a_ast = Stmt.parse_all(p3a_toks) -println(p3a_ast) - -val p3b = "if false then x := 5 else x := 10" -val p3b_toks = Tok.fromString(p3b) -val p3b_ast = Stmt.parse_all(p3b_toks) -println(p3b_ast) - -// multiplication -val p4 = """{ x := 5; - y := 4; - r := 0; - while y > 0 do { - r := r + x; - y := y - 1 - } - }""" -val p4_toks = Tok.fromString(p4) -val p4_ast = Block.parse_all(p4_toks) -println(p4_ast) - -val p5 = """ - n := 9; - minus1 := 0; - minus2 := 1; - temp := 0; - while n > 0 do { - temp := minus2; - minus2 := minus1 + minus2; - minus1 := temp; - n := n - 1 - }; - fib_res := minus2 -""" -val p5_toks = Tok.fromString(p5) -val p5_ast = Stmts.parse_all(p5_toks) - -// interpreter -type Env = Map[String, Int] - -def eval_bexp(b: BExp, env: Env) : Boolean = b match { - case True => true - case False => false - case Bop("=", a1, a2) => eval_aexp(a1, env) == eval_aexp(a2, env) - case Bop("!=", a1, a2) => !(eval_aexp(a1, env) == eval_aexp(a2, env)) - case Bop(">", a1, a2) => eval_aexp(a1, env) > eval_aexp(a2, env) - case Bop("<", a1, a2) => eval_aexp(a1, env) < eval_aexp(a2, env) -} - -def eval_aexp(a: AExp, env : Env) : Int = a match { - case Num(i) => i - case Var(s) => env(s) - case Aop("+", a1, a2) => eval_aexp(a1, env) + eval_aexp(a2, env) - case Aop("-", a1, a2) => eval_aexp(a1, env) - eval_aexp(a2, env) - case Aop("*", a1, a2) => eval_aexp(a1, env) * eval_aexp(a2, env) -} - -def eval_stmt(s: Stmt, env: Env) : Env = s match { - case Skip => env - case Assign(x, a) => env + (x -> eval_aexp(a, env)) - case If(b, bl1, bl2) => if (eval_bexp(b, env)) eval_bl(bl1, env) else eval_bl(bl2, env) - case While(b, bl) => - if (eval_bexp(b, env)) eval_stmt(While(b, bl), eval_bl(bl, env)) - else env -} - -def eval_bl(bl: Block, env: Env) : Env = bl match { - case Nil => env - case s::bl => eval_bl(bl, eval_stmt(s, env)) -} - -//examples -println(eval_stmt(p3a_ast.head, Map.empty)) -println(eval_stmt(p3b_ast.head, Map.empty)) -println(eval_bl(p4_ast.head, Map.empty)) -println(eval_bl(p5_ast.head, Map.empty)) diff -r 47f86885d481 -r e85600529ca5 while1.scala --- a/while1.scala Sun Dec 23 00:38:56 2012 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,220 +0,0 @@ -// A parser and evaluator for the WHILE language -// -import matcher._ -import parser._ - - -// some regular expressions -val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_") -val DIGIT = RANGE("0123456789") -val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT))) -val NUM = PLUS(DIGIT) -val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false", "write") -val SEMI: Rexp = ";" -val OP: Rexp = ALTS(":=", "=", "-", "+", "*", "!=", "<", ">") -val WHITESPACE = PLUS(RANGE(" \n")) -val RPAREN: Rexp = ")" -val LPAREN: Rexp = "(" -val BEGIN: Rexp = "{" -val END: Rexp = "}" -val COMMENT = SEQS("/*", NOT(SEQS(STAR(ALLC), "*/", STAR(ALLC))), "*/") - -// tokens for classifying the strings that have been recognised -abstract class Token -case object T_WHITESPACE extends Token -case object T_COMMENT extends Token -case object T_SEMI extends Token -case object T_LPAREN extends Token -case object T_RPAREN extends Token -case object T_BEGIN extends Token -case object T_END extends Token -case class T_ID(s: String) extends Token -case class T_OP(s: String) extends Token -case class T_NUM(s: String) extends Token -case class T_KWD(s: String) extends Token - -val lexing_rules: List[(Rexp, List[Char] => Token)] = - List((KEYWORD, (s) => T_KWD(s.mkString)), - (ID, (s) => T_ID(s.mkString)), - (OP, (s) => T_OP(s.mkString)), - (NUM, (s) => T_NUM(s.mkString)), - (SEMI, (s) => T_SEMI), - (LPAREN, (s) => T_LPAREN), - (RPAREN, (s) => T_RPAREN), - (BEGIN, (s) => T_BEGIN), - (END, (s) => T_END), - (WHITESPACE, (s) => T_WHITESPACE), - (COMMENT, (s) => T_COMMENT)) - -// the tokenizer -val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE, T_COMMENT)) - -// the abstract syntax trees -abstract class Stmt -abstract class AExp -abstract class BExp -type Block = List[Stmt] -case object Skip extends Stmt -case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt -case class While(b: BExp, bl: Block) extends Stmt -case class Assign(s: String, a: AExp) extends Stmt -case class Write(s: String) extends Stmt - -case class Var(s: String) extends AExp -case class Num(i: Int) extends AExp -case class Aop(o: String, a1: AExp, a2: AExp) extends AExp - -case object True extends BExp -case object False extends BExp -case class Bop(o: String, a1: AExp, a2: AExp) extends BExp - -// atomic parsers -case class TokParser(tok: Token) extends Parser[List[Token], Token] { - def parse(ts: List[Token]) = ts match { - case t::ts if (t == tok) => Set((t, ts)) - case _ => Set () - } -} -implicit def token2tparser(t: Token) = TokParser(t) - -case object NumParser extends Parser[List[Token], Int] { - def parse(ts: List[Token]) = ts match { - case T_NUM(s)::ts => Set((s.toInt, ts)) - case _ => Set () - } -} - -case object IdParser extends Parser[List[Token], String] { - def parse(ts: List[Token]) = ts match { - case T_ID(s)::ts => Set((s, ts)) - case _ => Set () - } -} - - -// arithmetic expressions -lazy val AExp: Parser[List[Token], AExp] = - (T ~ T_OP("+") ~ AExp) ==> { case ((x, y), z) => Aop("+", x, z): AExp } || - (T ~ T_OP("-") ~ AExp) ==> { case ((x, y), z) => Aop("-", x, z): AExp } || T -lazy val T: Parser[List[Token], AExp] = - (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F -lazy val F: Parser[List[Token], AExp] = - (T_LPAREN ~> AExp <~ T_RPAREN) || - IdParser ==> Var || - NumParser ==> Num - -// boolean expressions -lazy val BExp: Parser[List[Token], BExp] = - (T_KWD("true") ==> ((_) => True: BExp)) || - (T_KWD("false") ==> ((_) => False: BExp)) || - (T_LPAREN ~> BExp <~ T_RPAREN) || - (AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Bop("=", x, z): BExp } || - (AExp ~ T_OP("!=") ~ AExp) ==> { case ((x, y), z) => Bop("!=", x, z): BExp } || - (AExp ~ T_OP("<") ~ AExp) ==> { case ((x, y), z) => Bop("<", x, z): BExp } || - (AExp ~ T_OP(">") ~ AExp) ==> { case ((x, y), z) => Bop("<", z, x): BExp } - -lazy val Stmt: Parser[List[Token], Stmt] = - (T_KWD("skip") ==> ((_) => Skip: Stmt)) || - (IdParser ~ T_OP(":=") ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } || - (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Block ~ T_KWD("else") ~ Block) ==> - { case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } || - (T_KWD("while") ~ BExp ~ T_KWD("do") ~ Block) ==> { case (((x, y), z), w) => While(y, w) } || - (T_KWD("write") ~ IdParser) ==> { case (x, y) => Write(y) } - -lazy val Stmts: Parser[List[Token], Block] = - (Stmt ~ T_SEMI ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } || - (Stmt ==> ((s) => List(s) : Block)) - -lazy val Block: Parser[List[Token], Block] = - (T_BEGIN ~> Stmts <~ T_END) || - (Stmt ==> ((s) => List(s))) - -// interpreter -type Env = Map[String, Int] - -def eval_bexp(b: BExp, env: Env) : Boolean = b match { - case True => true - case False => false - case Bop("=", a1, a2) => eval_aexp(a1, env) == eval_aexp(a2, env) - case Bop("!=", a1, a2) => !(eval_aexp(a1, env) == eval_aexp(a2, env)) - case Bop("<", a1, a2) => eval_aexp(a1, env) < eval_aexp(a2, env) -} - -def eval_aexp(a: AExp, env : Env) : Int = a match { - case Num(i) => i - case Var(s) => env(s) - case Aop("+", a1, a2) => eval_aexp(a1, env) + eval_aexp(a2, env) - case Aop("-", a1, a2) => eval_aexp(a1, env) - eval_aexp(a2, env) - case Aop("*", a1, a2) => eval_aexp(a1, env) * eval_aexp(a2, env) -} - -def eval_stmt(s: Stmt, env: Env) : Env = s match { - case Skip => env - case Assign(x, a) => env + (x -> eval_aexp(a, env)) - case If(b, bl1, bl2) => if (eval_bexp(b, env)) eval_bl(bl1, env) else eval_bl(bl2, env) - case While(b, bl) => - if (eval_bexp(b, env)) eval_stmt(While(b, bl), eval_bl(bl, env)) - else env - case Write(x) => { println(env(x)); env } -} - -def eval_bl(bl: Block, env: Env) : Env = bl match { - case Nil => env - case s::bl => eval_bl(bl, eval_stmt(s, env)) -} - -def eval_prog(name: String) : Env = { - val tks = Tok.fromFile(name) - val ast = Stmts.parse_single(tks) - eval_bl(ast, Map.empty) -} - - -//examples - -//eval_prog("loops.while") -eval_prog("fib.while") - - -def time_needed[T](i: Int, code: => T) = { - val start = System.nanoTime() - for (j <- 1 to i) code - val end = System.nanoTime() - (end - start)/(i * 1.0e9) -} - - -val test_prog = """ -start := XXX; -x := start; -y := start; -z := start; -while 0 < x do { - while 0 < y do { - while 0 < z do { - z := z - 1 - }; - z := start; - y := y - 1 - }; - y := start; - x := x - 1 -} -""" - - - -def eval_test(n: Int) : Unit = { - val tks = Tok.fromString(test_prog.replaceAllLiterally("XXX", n.toString)) - val ast = Stmts.parse_single(tks) - println(n + " " + time_needed(2, eval_bl(ast, Map.empty))) -} - -List(1, 200, 400, 600, 800, 1000, 1200, 1400, 1600).map(eval_test(_)) - - - - - - -