--- a/S_grammar-token.scala Sun Dec 23 00:38:56 2012 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,47 +0,0 @@
-//:load matcher.scala
-//:load parser3.scala
-
-abstract class Token
-case object T_ONE extends Token
-
-val lexing_rules : List[Rule[Token]] =
- List(("1", (s: List[Char]) => T_ONE))
-
-val T = Tokenizer(lexing_rules)
-
-case class TokParser(tok: Token) extends Parser[List[Token], Token] {
- def parse(ts: List[Token]) = ts match {
- case t::ts if (t == tok) => Set((t, ts))
- case _ => Set ()
- }
-}
-implicit def token2tokparser(t: Token) = TokParser(t)
-
-case object EmpParser extends Parser[List[Token], String] {
- def parse(ts: List[Token]) = Set(("", ts))
-}
-
-
-lazy val Su: Parser[List[Token], String] =
- (T_ONE ~ Su) ==> { case (x, y) => "1" + y} || EmpParser
-
-
-def time_needed[T](i: Int, code: => T) = {
- val start = System.nanoTime()
- for (j <- 1 to i) code
- val end = System.nanoTime()
- (end - start)/(i * 1.0e9)
-}
-
-def test(i: Int) = {
- val result = Su.parse_all(T.fromString("1" * i))
- //print(result.size + " ")
-}
-
-
-for (i <- 1 to 1000 by 50) {
- print(i + " ")
- print("%.5f".format(time_needed(1, test(i))))
- print("\n")
-}
-
--- a/S_grammar.scala Sun Dec 23 00:38:56 2012 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,52 +0,0 @@
-//:load parser3.scala
-
-case class StringParser(s: String) extends Parser[String, String] {
- def parse(ts: String) = {
- if (s.length <= ts.length && ts.startsWith(s)) Set((s, ts.drop(s.length)))
- else Set()
- }
-}
-
-implicit def string2parser(s: String) = StringParser(s)
-
-def time_needed[T](i: Int, code: => T) = {
- val start = System.nanoTime()
- for (j <- 1 to i) code
- val end = System.nanoTime()
- (end - start)/(i * 1.0e9)
-}
-
-// unambiguous grammar
-
-lazy val U: Parser[String, String] =
- ("1" ~ U) ==> { case (x, y) => "1" + y} || ""
-
-def test1(i: Int) = {
- val result = U.parse_all("1" * i)
- //print(result.size + " ")
-}
-
-for (i <- 1 to 1000 by 50) {
- print(i + " ")
- print("%.5f".format(time_needed(1, test1(i))))
- print("\n")
-}
-
-
-
-// ambiguous grammar
-// n = 16 -> over 35 million parse trees
-
-lazy val S: Parser[String, String] =
- ("1" ~ S ~ S) ==> { case ((x, y), z) => "1" + y + z} || ""
-
-def test2(i: Int) = {
- val result = S.parse_all("1" * i)
- print(result.size + " ")
-}
-
-for (i <- 1 to 30) {
- print(i + " ")
- print("%.5f".format(time_needed(1, test2(i))))
- print("\n")
-}
--- a/Term_grammar.scala Sun Dec 23 00:38:56 2012 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,62 +0,0 @@
-//:load matcher.scala
-//:load parser3.scala
-
-// some regular expressions
-val LETTER = RANGE("abcdefghijklmnopqrstuvwxyz")
-val ID = PLUS(LETTER)
-
-val DIGIT = RANGE("0123456789")
-val NONZERODIGIT = RANGE("123456789")
-val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0")
-
-val LPAREN = CHAR('(')
-val RPAREN = CHAR(')')
-
-val WHITESPACE = PLUS(RANGE(" \n"))
-val OPS = RANGE("+-*")
-
-// for classifying the strings that have been lexed
-abstract class Token
-
-case object T_WHITESPACE extends Token
-case class T_NUM(s: String) extends Token
-case class T_OP(s: String) extends Token
-case object T_LPAREN extends Token
-case object T_RPAREN extends Token
-
-
-// lexing rules for arithmetic expressions
-val lexing_rules: List[Rule[Token]]=
- List((NUMBER, (s) => T_NUM(s.mkString)),
- (WHITESPACE, (s) => T_WHITESPACE),
- (LPAREN, (s) => T_LPAREN),
- (RPAREN, (s) => T_RPAREN),
- (OPS, (s) => T_OP(s.mkString)))
-
-val Tk = Tokenizer(lexing_rules, List(T_WHITESPACE))
-
-
-case class TokParser(tok: Token) extends Parser[List[Token], Token] {
- def parse(ts: List[Token]) = ts match {
- case t::ts if (t == tok) => Set((t, ts))
- case _ => Set ()
- }
-}
-implicit def token2tparser(t: Token) = TokParser(t)
-
-case object NumParser extends Parser[List[Token], Int] {
- def parse(ts: List[Token]) = ts match {
- case T_NUM(s)::ts => Set((s.toInt, ts))
- case _ => Set ()
- }
-}
-
-lazy val E: Parser[List[Token], Int] = (T ~ T_OP("+") ~ E) ==> { case ((x, y), z) => x + z } || T
-lazy val T: Parser[List[Token], Int] = (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => x * z } || F
-lazy val F: Parser[List[Token], Int] = (T_LPAREN ~> E <~ T_RPAREN) || NumParser
-
-println(E.parse_all(Tk.fromString("1 + 2 + 3")))
-println(E.parse_all(Tk.fromString("1 + 2 * 3")))
-println(E.parse_all(Tk.fromString("(1 + 2) * 3")))
-println(E.parse_all(Tk.fromString("(14 + 2) * (3 + 2)")))
-
--- a/app0.scala Sun Dec 23 00:38:56 2012 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-import io.Source
-
-def get_page(url: String) : String = {
- Source.fromURL(url).take(10000).mkString
-
-
--- a/app1.scala Sun Dec 23 00:38:56 2012 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,12 +0,0 @@
-def get_page(url: String) : String = {
- try {
- Source.fromURL(url).take(10000).mkString
- }
- catch {
- case e => {
- println(" Problem with: " + url)
- ""
- }
- }
-}
-
--- a/app2.scala Sun Dec 23 00:38:56 2012 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,16 +0,0 @@
-val http_pattern = """\"https?://[^\"]*\"""".r
-
-def unquote(s: String) = s.drop(1).dropRight(1)
-
-def get_all_URLs(page: String) : Set[String] = {
- (http_pattern.findAllIn(page)).map { unquote(_) }.toSet
-}
-
-def crawl(url: String, n: Int) : Unit = {
- if (n == 0) ()
- else {
- println("Visiting: " + n + " " + url)
- for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1)
- }
-}
-
--- a/app3.scala Sun Dec 23 00:38:56 2012 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
-val my_urls = """urbanc""".r
-
-def crawl(url: String, n: Int) : Unit = {
- if (n == 0) ()
- else if (my_urls.findFirstIn(url) == None) ()
- else {
- println("Visiting: " + n + " " + url)
- for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1)
- }
-}
--- a/app4.scala Sun Dec 23 00:38:56 2012 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,14 +0,0 @@
-val http_pattern = """\"https?://[^\"]*\"""".r
-val my_urls = """urbanc""".r
-val email_pattern =
- """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r
-
-def crawl(url: String, n: Int) : Unit = {
- if (n == 0) ()
- else {
- println("Visiting: " + n + " " + url)
- val page = get_page(url)
- println(email_pattern.findAllIn(page).mkString("\n"))
- for (u <- get_all_URLs(page)) crawl(u, n - 1)
- }
-}
--- a/app5.scala Sun Dec 23 00:38:56 2012 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,8 +0,0 @@
-def nullable (r: Rexp) : Boolean = r match {
- case NULL => false
- case EMPTY => true
- case CHAR(_) => false
- case ALT(r1, r2) => nullable(r1) || nullable(r2)
- case SEQ(r1, r2) => nullable(r1) && nullable(r2)
- case STAR(_) => true
-}
--- a/app51.scala Sun Dec 23 00:38:56 2012 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,8 +0,0 @@
-abstract class Rexp
-
-case object NULL extends Rexp
-case object EMPTY extends Rexp
-case class CHAR(c: Char) extends Rexp
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
-case class STAR(r: Rexp) extends Rexp
--- a/app6.scala Sun Dec 23 00:38:56 2012 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,11 +0,0 @@
-def deriv (r: Rexp, c: Char) : Rexp = r match {
- case NULL => NULL
- case EMPTY => NULL
- case CHAR(d) => if (c == d) EMPTY else NULL
- case ALT(r1, r2) => ALT(deriv(r1, c), deriv(r2, c))
- case SEQ(r1, r2) =>
- if (nullable(r1)) ALT(SEQ(deriv(r1, c), r2), deriv(r2, c))
- else SEQ(deriv(r1, c), r2)
- case STAR(r) => SEQ(deriv(r, c), STAR(r))
-}
-
--- a/app7.scala Sun Dec 23 00:38:56 2012 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,16 +0,0 @@
-abstract class Parser[I, T] {
- def parse(ts: I): Set[(T, I)]
-
- def parse_all(ts: I) : Set[T] =
- for ((head, tail) <- parse(ts); if (tail.isEmpty))
- yield head
-
- def || (right : => Parser[I, T]) : Parser[I, T] =
- new AltParser(this, right)
- def ==>[S] (f: => T => S) : Parser [I, S] =
- new FunParser(this, f)
- def ~[S] (right : => Parser[I, S]) : Parser[I, (T, S)] =
- new SeqParser(this, right)
-}
-
-
--- a/app8.scala Sun Dec 23 00:38:56 2012 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,23 +0,0 @@
-class SeqParser[I, T, S](p: => Parser[I, T],
- q: => Parser[I, S])
- extends Parser[I, (T, S)] {
- def parse(sb: I) =
- for ((head1, tail1) <- p.parse(sb);
- (head2, tail2) <- q.parse(tail1))
- yield ((head1, head2), tail2)
-}
-
-class AltParser[I, T](p: => Parser[I, T],
- q: => Parser[I, T])
- extends Parser[I, T] {
- def parse(sb: I) = p.parse(sb) ++ q.parse(sb)
-}
-
-class FunParser[I, T, S](p: => Parser[I, T], f: T => S)
- extends Parser[I, S] {
- def parse(sb: I) =
- for ((head, tail) <- p.parse(sb))
- yield (f(head), tail)
-}
-
-
--- a/automata.scala Sun Dec 23 00:38:56 2012 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,106 +0,0 @@
-
-// a class for deterministic finite automata,
-// the type of states is kept polymorphic
-
-case class Automaton[A](start: A, states: Set[A], delta: Map[(A, Char), A], fins: Set[A]) {
-
- // the transition function lifted to list of characters
- def deltas(q: A, cs: List[Char]) : Either[A, String] =
- if (states.contains(q)) cs match {
- case Nil => Left(q)
- case c::cs =>
- if (delta.isDefinedAt(q, c)) deltas(delta(q, c), cs)
- else Right(q + " does not have a transition for " + c)
- }
- else Right(q + " is not a state of the automaton")
-
- // wether a string is accepted by the automaton
- def accepts(s: String) = deltas(start, s.toList) match {
- case Left(q) => fins.contains(q)
- case _ => false
- }
-}
-
-
-// translating a regular expression into a finite
-// automaton
-
-abstract class Rexp
-
-case object NULL extends Rexp
-case object EMPTY extends Rexp
-case class CHAR(c: Char) extends Rexp
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
-case class STAR(r: Rexp) extends Rexp
-
-implicit def string2rexp(s : String) = {
- def chars2rexp (cs: List[Char]) : Rexp = cs match {
- case Nil => EMPTY
- case c::Nil => CHAR(c)
- case c::cs => SEQ(CHAR(c), chars2rexp(cs))
- }
- chars2rexp(s.toList)
-}
-
-def nullable (r: Rexp) : Boolean = r match {
- case NULL => false
- case EMPTY => true
- case CHAR(_) => false
- case ALT(r1, r2) => nullable(r1) || nullable(r2)
- case SEQ(r1, r2) => nullable(r1) && nullable(r2)
- case STAR(_) => true
-}
-
-def der (r: Rexp, c: Char) : Rexp = r match {
- case NULL => NULL
- case EMPTY => NULL
- case CHAR(d) => if (c == d) EMPTY else NULL
- case ALT(r1, r2) => ALT(der(r1, c), der(r2, c))
- case SEQ(r1, r2) => if (nullable(r1)) ALT(SEQ(der(r1, c), r2), der(r2, c))
- else SEQ(der(r1, c), r2)
- case STAR(r) => SEQ(der(r, c), STAR(r))
-}
-
-
-// Here we construct an automaton whose
-// states are regular expressions
-type State = Rexp
-type States = Set[State]
-type Transition = Map[(State, Char), State]
-
-// we use as an alphabet all lowercase letters
-val alphabet = "abcdefghijklmnopqrstuvwxyz".toSet
-
-def goto(q: State, c: Char, qs: States, delta: Transition) : (States, Transition) = {
- val q_der : State = der(q, c)
- if (qs.contains(q_der)) (qs, delta + ((q, c) -> q))
- else explore(qs + q_der, delta + ((q, c) -> q_der), q_der)
-}
-
-def explore (qs: States, delta: Transition, q: State) : (States, Transition) =
- alphabet.foldRight[(States, Transition)] (qs, delta) ((c, qsd) => goto(q, c, qsd._1, qsd._2))
-
-
-def mk_automaton (r: Rexp) : Automaton[Rexp] = {
- val (qs, delta) = explore(Set(r), Map(), r);
- val fins = for (q <- qs if nullable(q)) yield q;
- Automaton[Rexp](r, qs, delta, fins)
-}
-
-val A = mk_automaton(ALT("ab","ac"))
-
-A.start
-A.states.toList.length
-
-println(A.accepts("bd"))
-println(A.accepts("ab"))
-println(A.accepts("ac"))
-
-val r1 = STAR(ALT("a","b"))
-val r2 = SEQ("b","b")
-val r3 = SEQ(SEQ(SEQ(r1, r2), r1), "a")
-val B = mk_automaton(r3)
-
-B.start
-B.states.toList.length
--- a/compile.scala Sun Dec 23 00:38:56 2012 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,326 +0,0 @@
-// A parser and evaluator for teh while language
-//
-import matcher._
-import parser._
-
-// some regular expressions
-val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_")
-val DIGIT = RANGE("0123456789")
-val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT)))
-val NUM = PLUS(DIGIT)
-val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false", "write")
-val SEMI: Rexp = ";"
-val OP: Rexp = ALTS(":=", "=", "-", "+", "*", "!=", "<", ">")
-val WHITESPACE = PLUS(RANGE(" \n"))
-val RPAREN: Rexp = ")"
-val LPAREN: Rexp = "("
-val BEGIN: Rexp = "{"
-val END: Rexp = "}"
-val COMMENT = SEQS("/*", NOT(SEQS(STAR(ALLC), "*/", STAR(ALLC))), "*/")
-
-// tokens for classifying the strings that have been recognised
-abstract class Token
-case object T_WHITESPACE extends Token
-case object T_COMMENT extends Token
-case object T_SEMI extends Token
-case object T_LPAREN extends Token
-case object T_RPAREN extends Token
-case object T_BEGIN extends Token
-case object T_END extends Token
-case class T_ID(s: String) extends Token
-case class T_OP(s: String) extends Token
-case class T_NUM(s: String) extends Token
-case class T_KWD(s: String) extends Token
-
-val lexing_rules: List[(Rexp, List[Char] => Token)] =
- List((KEYWORD, (s) => T_KWD(s.mkString)),
- (ID, (s) => T_ID(s.mkString)),
- (OP, (s) => T_OP(s.mkString)),
- (NUM, (s) => T_NUM(s.mkString)),
- (SEMI, (s) => T_SEMI),
- (LPAREN, (s) => T_LPAREN),
- (RPAREN, (s) => T_RPAREN),
- (BEGIN, (s) => T_BEGIN),
- (END, (s) => T_END),
- (WHITESPACE, (s) => T_WHITESPACE),
- (COMMENT, (s) => T_COMMENT))
-
-// the tokenizer
-val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE, T_COMMENT))
-
-// the abstract syntax trees
-abstract class Stmt
-abstract class AExp
-abstract class BExp
-type Block = List[Stmt]
-case object Skip extends Stmt
-case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt
-case class While(b: BExp, bl: Block) extends Stmt
-case class Assign(s: String, a: AExp) extends Stmt
-case class Write(s: String) extends Stmt
-
-case class Var(s: String) extends AExp
-case class Num(i: Int) extends AExp
-case class Aop(o: String, a1: AExp, a2: AExp) extends AExp
-
-case object True extends BExp
-case object False extends BExp
-case class Relop(o: String, a1: AExp, a2: AExp) extends BExp
-
-// atomic parsers
-case class TokParser(tok: Token) extends Parser[List[Token], Token] {
- def parse(ts: List[Token]) = ts match {
- case t::ts if (t == tok) => Set((t, ts))
- case _ => Set ()
- }
-}
-implicit def token2tparser(t: Token) = TokParser(t)
-
-case object NumParser extends Parser[List[Token], Int] {
- def parse(ts: List[Token]) = ts match {
- case T_NUM(s)::ts => Set((s.toInt, ts))
- case _ => Set ()
- }
-}
-
-case object IdParser extends Parser[List[Token], String] {
- def parse(ts: List[Token]) = ts match {
- case T_ID(s)::ts => Set((s, ts))
- case _ => Set ()
- }
-}
-
-
-// arithmetic expressions
-lazy val AExp: Parser[List[Token], AExp] =
- (T ~ T_OP("+") ~ AExp) ==> { case ((x, y), z) => Aop("+", x, z): AExp } ||
- (T ~ T_OP("-") ~ AExp) ==> { case ((x, y), z) => Aop("-", x, z): AExp } || T
-lazy val T: Parser[List[Token], AExp] =
- (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F
-lazy val F: Parser[List[Token], AExp] =
- (T_LPAREN ~> AExp <~ T_RPAREN) ||
- IdParser ==> Var ||
- NumParser ==> Num
-
-// boolean expressions
-lazy val BExp: Parser[List[Token], BExp] =
- (T_KWD("true") ==> ((_) => True: BExp)) ||
- (T_KWD("false") ==> ((_) => False: BExp)) ||
- (T_LPAREN ~> BExp <~ T_RPAREN) ||
- (AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Relop("=", x, z): BExp } ||
- (AExp ~ T_OP("!=") ~ AExp) ==> { case ((x, y), z) => Relop("!=", x, z): BExp } ||
- (AExp ~ T_OP("<") ~ AExp) ==> { case ((x, y), z) => Relop("<", x, z): BExp } ||
- (AExp ~ T_OP(">") ~ AExp) ==> { case ((x, y), z) => Relop("<", z, x): BExp }
-
-lazy val Stmt: Parser[List[Token], Stmt] =
- (T_KWD("skip") ==> ((_) => Skip: Stmt)) ||
- (IdParser ~ T_OP(":=") ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } ||
- (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Block ~ T_KWD("else") ~ Block) ==>
- { case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } ||
- (T_KWD("while") ~ BExp ~ T_KWD("do") ~ Block) ==> { case (((x, y), z), w) => While(y, w) } ||
- (T_KWD("write") ~ IdParser) ==> { case (x, y) => Write(y) }
-
-lazy val Stmts: Parser[List[Token], Block] =
- (Stmt ~ T_SEMI ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } ||
- (Stmt ==> ((s) => List(s) : Block))
-
-lazy val Block: Parser[List[Token], Block] =
- (T_BEGIN ~> Stmts <~ T_END) ||
- (Stmt ==> ((s) => List(s)))
-
-// compiler
-val beginning = """
-.class public XXX.XXX
-.super java/lang/Object
-
-.method public <init>()V
- aload_0
- invokenonvirtual java/lang/Object/<init>()V
- return
-.end method
-
-.method public static write(I)V
- .limit locals 5
- .limit stack 5
- iload 0
- getstatic java/lang/System/out Ljava/io/PrintStream;
- swap
- invokevirtual java/io/PrintStream/println(I)V
- return
-.end method
-
-
-.method public static main([Ljava/lang/String;)V
- .limit locals 200
- .limit stack 200
-
-"""
-
-val ending = """
-
- return
-
-.end method
-"""
-
-// for generating new labels
-var counter = -1
-
-def Fresh(x: String) = {
- counter += 1
- x ++ "_" ++ counter.toString()
-}
-
-type Env = Map[String, String]
-type Instrs = List[String]
-
-def compile_aexp(a: AExp, env : Env) : Instrs = a match {
- case Num(i) => List("ldc " + i.toString + "\n")
- case Var(s) => List("iload " + env(s) + "\n")
- case Aop("+", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("iadd\n")
- case Aop("-", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("isub\n")
- case Aop("*", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("imul\n")
-}
-
-def compile_bexp(b: BExp, env : Env, jmp: String) : Instrs = b match {
- case True => Nil
- case False => List("goto " + jmp + "\n")
- case Relop("=", a1, a2) =>
- compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpne " + jmp + "\n")
- case Relop("!=", a1, a2) =>
- compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpeq " + jmp + "\n")
- case Relop("<", a1, a2) =>
- compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpge " + jmp + "\n")
-}
-
-
-def compile_stmt(s: Stmt, env: Env) : (Instrs, Env) = s match {
- case Skip => (Nil, env)
- case Assign(x, a) => {
- val index = if (env.isDefinedAt(x)) env(x) else env.keys.size.toString
- (compile_aexp(a, env) ++
- List("istore " + index + "\n"), env + (x -> index))
- }
- case If(b, bl1, bl2) => {
- val if_else = Fresh("If_else")
- val if_end = Fresh("If_end")
- val (instrs1, env1) = compile_bl(bl1, env)
- val (instrs2, env2) = compile_bl(bl2, env1)
- (compile_bexp(b, env, if_else) ++
- instrs1 ++
- List("goto " + if_end + "\n") ++
- List("\n" + if_else + ":\n\n") ++
- instrs2 ++
- List("\n" + if_end + ":\n\n"), env2)
- }
- case While(b, bl) => {
- val loop_begin = Fresh("Loop_begin")
- val loop_end = Fresh("Loop_end")
- val (instrs1, env1) = compile_bl(bl, env)
- (List("\n" + loop_begin + ":\n\n") ++
- compile_bexp(b, env, loop_end) ++
- instrs1 ++
- List("goto " + loop_begin + "\n") ++
- List("\n" + loop_end + ":\n\n"), env1)
- }
- case Write(x) =>
- (List("iload " + env(x) + "\n" + "invokestatic XXX/XXX/write(I)V\n"), env)
-}
-
-def compile_bl(bl: Block, env: Env) : (Instrs, Env) = bl match {
- case Nil => (Nil, env)
- case s::bl => {
- val (instrs1, env1) = compile_stmt(s, env)
- val (instrs2, env2) = compile_bl(bl, env1)
- (instrs1 ++ instrs2, env2)
- }
-}
-
-def compile(input: String) : String = {
- val class_name = input.split('.')(0)
- val tks = Tok.fromFile(input)
- val ast = Stmts.parse_single(tks)
- val instructions = compile_bl(ast, Map.empty)._1
- (beginning ++ instructions.mkString ++ ending).replaceAllLiterally("XXX", class_name)
-}
-
-
-def compile_to(input: String, output: String) = {
- val fw = new java.io.FileWriter(output)
- fw.write(compile(input))
- fw.close()
-}
-
-//
-val tks = Tok.fromString("x := x + 1")
-val ast = Stmt.parse_single(tks)
-println(compile_stmt(ast, Map("x" -> "n"))._1.mkString)
-
-
-
-//examples
-
-compile_to("loops.while", "loops.j")
-//compile_to("fib.while", "fib.j")
-
-
-// testing cases for time measurements
-
-def time_needed[T](i: Int, code: => T) = {
- val start = System.nanoTime()
- for (j <- 1 to i) code
- val end = System.nanoTime()
- (end - start)/(i * 1.0e9)
-}
-
-// for testing
-import scala.sys.process._
-
-val test_prog = """
-start := XXX;
-x := start;
-y := start;
-z := start;
-while 0 < x do {
- while 0 < y do {
- while 0 < z do {
- z := z - 1
- };
- z := start;
- y := y - 1
- };
- y := start;
- x := x - 1
-};
-write x;
-write y;
-write z
-"""
-
-
-def compile_test(n: Int) : Unit = {
- val class_name = "LOOP"
- val tks = Tok.fromString(test_prog.replaceAllLiterally("XXX", n.toString))
- val ast = Stmts.parse_single(tks)
- val instructions = compile_bl(ast, Map.empty)._1
- val assembly = (beginning ++ instructions.mkString ++ ending).replaceAllLiterally("XXX", class_name)
- val fw = new java.io.FileWriter(class_name + ".j")
- fw.write(assembly)
- fw.close()
- val test = ("java -jar jvm/jasmin-2.4/jasmin.jar " + class_name + ".j").!!
- println(n + " " + time_needed(2, ("java " + class_name + "/" + class_name).!!))
-}
-
-List(1, 5000, 10000, 50000, 100000, 250000, 500000, 750000, 1000000).map(compile_test(_))
-
-
-
-// Javabyte code assmbler
-//
-// java -jar jvm/jasmin-2.4/jasmin.jar loops.j
-
-
-
-
-
-
--- a/crawler.scala Sun Dec 23 00:38:56 2012 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,144 +0,0 @@
-import io.Source
-import scala.util.matching.Regex
-
-// gets the first ~10K of a page
-def get_page(url: String) : String = {
- try {
- Source.fromURL(url).take(10000).mkString
- }
- catch {
- case e => {
- println(" Problem with: " + url)
- ""
- }
- }
-}
-
-// non-existing page -> returns the empty string
-get_page("""http://www.foobar.com""")
-
-
-// staring URL for the crawler
-val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc/"""
-
-// starts with an "
-// then either http or https
-// then ://
-// then any character that is not "
-// finally "
-val http_pattern = """\"((?:http|https)://(?:[^\"])*)\"""".r
-val http_pattern = """\"(https?://[^\"]*)\"""".r
-
-def unquote(s: String) = s.drop(1).dropRight(1)
-
-def get_all_URLs(page: String) : Set[String] = {
- (http_pattern.findAllIn(page)).map { unquote(_) }.toSet
-}
-
-// get all urls in startURL
-get_all_URLs(get_page(startURL))
-
-// number of all urls in startURL
-get_all_URLs(get_page(startURL)).toList.length
-
-
-// naive version - seraches until a given depth
-// visits pages potentially more than once
-def crawl(url: String, n: Int) : Unit = {
- if (n == 0) ()
- else {
- println("Visiting: " + n + " " + url)
- for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1)
- }
-}
-
-crawl(startURL, 2)
-
-
-//breadth-first version without visiting
-//pages twice
-def bf_crawl(todo: Set[String], visited: Set[String], n: Int) : Unit = {
- if (n == 0) ()
- else {
- val new_todo = todo.flatMap {
- url => {
- if (visited.contains(url)) Set[String]()
- else {
- println("Visiting: " + n + " " + url)
- get_all_URLs(get_page(url))
- }
- }
- }
- bf_crawl(new_todo, visited union todo, n - 1)
- }
-}
-
-bf_crawl(Set(startURL1), Set(), 2)
-
-
-//breadth-first version without visiting
-//pages twice and only in "my" domain
-val my_pattern = """urbanc""".r
-
-// breadth first search avoiding double searches
-def bf_crawl2(todo: Set[String], visited: Set[String], n: Int) : Unit = {
- if (n == 0) ()
- else {
- val new_todo = todo.flatMap {
- url => {
- if (visited.contains(url)) Set[String]()
- else if (my_pattern.findFirstIn(url) == None) Set[String]()
- else {
- println("Visiting: " + n + " " + url);
- get_all_URLs(get_page(url))
- }
- }
- }
- bf_crawl2(new_todo, visited union todo, n - 1)
- }
-}
-
-bf_crawl2(Set(startURL1), Set(), 5)
-
-// email harvester
-// from
-// http://net.tutsplus.com/tutorials/other/8-regular-expressions-you-should-know/
-
-val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r
-
-def bf_crawl3(todo: Set[String], visited: Set[String], n: Int) : Unit = {
- if (n == 0) ()
- else {
- val new_todo = todo.flatMap {
- url => {
- if (visited.contains(url)) Set[String]()
- else {
- println("Visiting: " + n + " " + url);
- val page = get_page(url)
- println(email_pattern.findAllIn(page).mkString("\n"))
- get_all_URLs(get_page(url))
- }
- }
- }
- bf_crawl3(new_todo, visited union todo, n - 1)
- }
-}
-
-bf_crawl3(Set(startURL1), Set(), 3)
-
-
-// depth-first version does not work,
-// because it might visit pages at depth 1
-// while it still wants to visit them at
-// depth 2
-var visited = Set("")
-
-def crawl(url: String, n: Int) : Unit = {
- if (n == 0) ()
- else if (visited.contains(url)) () //println("Already visited: " + n + " " + url)
- else {
- println("Visiting: " + n + " " + url);
- visited += url
- for (u <- getAllURLs(getURLpage(url))) crawl(u, n - 1);
- }
-}
--- a/crawler1.scala Sun Dec 23 00:38:56 2012 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,45 +0,0 @@
-import io.Source
-import scala.util.matching.Regex
-
-// gets the first ~10K of a page
-def get_page(url: String) : String = {
- try {
- Source.fromURL(url).take(10000).mkString
- }
- catch {
- case e => {
- println(" Problem with: " + url)
- ""
- }
- }
-}
-
-
-// regex for URLs
-val http_pattern = """\"https?://[^\"]*\"""".r
-
-def unquote(s: String) = s.drop(1).dropRight(1)
-
-def get_all_URLs(page: String) : Set[String] = {
- (http_pattern.findAllIn(page)).map { unquote(_) }.toSet
-}
-
-// naive version - seraches until a given depth
-// visits pages potentially more than once
-def crawl(url: String, n: Int) : Unit = {
- if (n == 0) ()
- else {
- println("Visiting: " + n + " " + url)
- for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1)
- }
-}
-
-// staring URL for the crawler
-val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc/"""
-//val startURL = """http://www.inf.kcl.ac.uk/staff/mml/"""
-
-
-// call on the command line
-crawl(startURL, 2)
-
-crawl("""http://www.dcs.kcl.ac.uk/staff/urbanc/msc-projects-12.html""", 2)
--- a/crawler2.scala Sun Dec 23 00:38:56 2012 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,44 +0,0 @@
-import io.Source
-import scala.util.matching.Regex
-
-// gets the first ~10K of a page
-def get_page(url: String) : String = {
- try {
- Source.fromURL(url).take(10000).mkString
- }
- catch {
- case e => {
- println(" Problem with: " + url)
- ""
- }
- }
-}
-
-// staring URL for the crawler
-val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc/"""
-
-// regex for URLs
-val http_pattern = """\"https?://[^\"]*\"""".r
-val my_urls = """urbanc""".r
-
-def unquote(s: String) = s.drop(1).dropRight(1)
-
-def get_all_URLs(page: String) : Set[String] = {
- (http_pattern.findAllIn(page)).map { unquote(_) }.toSet
-}
-
-// naive version - seraches until a given depth
-// visits pages potentially more than once
-def crawl(url: String, n: Int) : Unit = {
- if (n == 0) ()
- else if (my_urls.findFirstIn(url) == None) ()
- else {
- println("Visiting: " + n + " " + url)
- for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1)
- }
-}
-
-// can now deal with depth 3
-// start on command line
-crawl(startURL, 4)
-
--- a/crawler3.scala Sun Dec 23 00:38:56 2012 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,49 +0,0 @@
-import io.Source
-import scala.util.matching.Regex
-
-// gets the first ~10K of a page
-def get_page(url: String) : String = {
- try {
- Source.fromURL(url).take(10000).mkString
- }
- catch {
- case e => {
- println(" Problem with: " + url)
- ""
- }
- }
-}
-
-// staring URL for the crawler
-val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc/"""
-
-// regex for URLs
-val http_pattern = """\"https?://[^\"]*\"""".r
-val my_urls = """urbanc""".r
-val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r
-
-// http://net.tutsplus.com/tutorials/other/8-regular-expressions-you-should-know/
-
-def unquote(s: String) = s.drop(1).dropRight(1)
-
-def get_all_URLs(page: String) : Set[String] = {
- (http_pattern.findAllIn(page)).map { unquote(_) }.toSet
-}
-
-// naive version - seraches until a given depth
-// visits pages potentially more than once
-def crawl(url: String, n: Int) : Unit = {
- if (n == 0) ()
- //else if (my_urls.findFirstIn(url) == None) ()
- else {
- println("Visiting: " + n + " " + url)
- val page = get_page(url)
- println(email_pattern.findAllIn(page).mkString("\n"))
- for (u <- get_all_URLs(page)) crawl(u, n - 1)
- }
-}
-
-// can now deal with depth 3
-// start on command line
-crawl(startURL, 3)
-
--- a/html.scala Sun Dec 23 00:38:56 2012 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,99 +0,0 @@
-
-//:load matcher.scala
-
-// some regular expressions
-val SYM = RANGE("""ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz.,!?-{[()]}':;%0123456789""")
-val WORD = PLUS(SYM)
-
-val BTAG = SEQS("<", WORD, ">")
-val ETAG = SEQS("</", WORD, ">")
-
-val WHITESPACE = PLUS(RANGE(" \n"))
-
-// for classifying the strings that have been recognised
-abstract class Token
-case object T_WHITESPACE extends Token
-case class T_WORD(s: String) extends Token
-case class T_ETAG(s: String) extends Token
-case class T_BTAG(s: String) extends Token
-case class T_NT(s: String, rhs: List[Token]) extends Token
-
-val lexing_rules: List[Rule[Token]] =
- List((BTAG, (s) => T_BTAG(s.mkString)),
- (ETAG, (s) => T_ETAG(s.mkString)),
- (WORD, (s) => T_WORD(s.mkString)),
- (WHITESPACE, (s) => T_WHITESPACE))
-
-// the tokenizer
-val T = Tokenizer(lexing_rules)
-
-// width for printing
-val WIDTH = 60
-
-
-def interpret(ts: List[Token], c: Int, ctr: List[String]) : Unit= ts match {
- case Nil => println(Console.RESET)
- case T_WHITESPACE::rest => print(Console.RESET + " "); interpret(rest, c + 1, ctr)
- case T_WORD(s)::rest => {
- val newstr = Console.RESET + ctr.reverse.mkString + s
- if (c + s.length < WIDTH) {
- print(newstr);
- interpret(rest, c + s.length, ctr)
- }
- else {
- print("\n" + newstr)
- interpret(rest, s.length, ctr)
- }
- }
- case T_BTAG("<p>")::rest => print("\n"); interpret(rest, 0, ctr)
- case T_ETAG("</p>")::rest => print("\n"); interpret(rest, 0, ctr)
- case T_BTAG("<b>")::rest => interpret(rest, c, Console.BOLD :: ctr)
- case T_BTAG("<a>")::rest => interpret(rest, c, Console.UNDERLINED :: ctr)
- case T_BTAG("<cyan>")::rest => interpret(rest, c, Console.CYAN :: ctr)
- case T_BTAG("<red>")::rest => interpret(rest, c, Console.RED :: ctr)
- case T_BTAG("<blink>")::rest => interpret(rest, c, Console.BLINK :: ctr)
- case T_ETAG(_)::rest => interpret(rest, c, ctr.tail)
- case _::rest => interpret(rest, c, ctr)
-}
-
-val test_string = """
-<b>MSc Projects</b>
-
-<p>
-start of paragraph. <cyan> a <red>cyan</red> word</cyan> normal again something longer.
-</p>
-
-
- <p><b>Description:</b>
- <a>Regular expressions</a> are extremely useful for many text-processing tasks such as finding patterns in texts,
- lexing programs, syntax highlighting and so on. Given that regular expressions were
- introduced in 1950 by <a>Stephen Kleene</a>, you might think
- regular expressions have since been studied and implemented to death. But you would definitely be mistaken: in fact they are still
- an active research area. For example
- <a>this paper</a>
- about regular expression matching and partial derivatives was presented this summer at the international
- PPDP'12 conference. The task in this project is to implement the results from this paper.</p>
-
- <p>The background for this project is that some regular expressions are
- <a>evil</a>
- and can stab you in the back; according to
- this <a>blog post</a>.
- For example, if you use in <a>Python</a> or
- in <a>Ruby</a> (probably also in other mainstream programming languages) the
- innocently looking regular expression a?{28}a{28} and match it, say, against the string
- <red>aaaaaaaaaa<cyan>aaaaaaa</cyan>aaaaaaaaaaa</red> (that is 28 as), you will soon notice that your CPU usage goes to 100%. In fact,
- Python and Ruby need approximately 30 seconds of hard work for matching this string. You can try it for yourself:
- <a>re.py</a> (Python version) and
- <a>re.rb</a>
- (Ruby version). You can imagine an attacker
- mounting a nice <a>DoS attack</a> against
- your program if it contains such an evil regular expression. Actually
- <a>Scala</a> (and also Java) are almost immune from such
- attacks as they can deal with strings of up to 4,300 as in less than a second. But if you scale
- the regular expression and string further to, say, 4,600 as, then you get a
- StackOverflowError
- potentially crashing your program.
- </p>
-"""
-
-interpret(T.fromString(test_string), 0, Nil)
--- a/matcher.scala Sun Dec 23 00:38:56 2012 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,130 +0,0 @@
-package object matcher {
-
-// regular expressions
-// including constructors for NOT and ALLC
-abstract class Rexp
-
-case object NULL extends Rexp
-case object EMPTY extends Rexp
-case object ALLC extends Rexp // recognises any character
-case class CHAR(c: Char) extends Rexp
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
-case class STAR(r: Rexp) extends Rexp
-case class NOT(r: Rexp) extends Rexp // negation of a regular expression
-
-
-// nullable function: tests whether the regular
-// expression can recognise the empty string
-def nullable (r: Rexp) : Boolean = r match {
- case NULL => false
- case EMPTY => true
- case ALLC => false
- case CHAR(_) => false
- case ALT(r1, r2) => nullable(r1) || nullable(r2)
- case SEQ(r1, r2) => nullable(r1) && nullable(r2)
- case STAR(_) => true
- case NOT(r) => !(nullable(r))
-}
-
-// tests whether a regular expression
-// cannot recognise more
-def no_more (r: Rexp) : Boolean = r match {
- case NULL => true
- case EMPTY => false
- case ALLC => false
- case CHAR(_) => false
- case ALT(r1, r2) => no_more(r1) && no_more(r2)
- case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1)
- case STAR(_) => false
- case NOT(r) => !(no_more(r))
-}
-
-
-// derivative of a regular expression w.r.t. a character
-def der (c: Char, r: Rexp) : Rexp = r match {
- case NULL => NULL
- case EMPTY => NULL
- case ALLC => EMPTY
- case CHAR(d) => if (c == d) EMPTY else NULL
- case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
- case SEQ(r1, r2) =>
- if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
- else SEQ(der(c, r1), r2)
- case STAR(r) => SEQ(der(c, r), STAR(r))
- case NOT(r) => NOT(der (c, r))
-}
-
-// main class for the tokenizer
-case class Tokenizer[T](rules: List[(Rexp, List[Char] => T)], excl: List[T] = Nil) {
-
-def munch(r: Rexp, action: List[Char] => T, s: List[Char], t: List[Char]) : Option[(List[Char], T)] =
- s match {
- case Nil if (nullable(r)) => Some(Nil, action(t))
- case Nil => None
- case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, action(t))
- case c::s if (no_more(der (c, r))) => None
- case c::s => munch(der (c, r), action, s, t ::: List(c))
- }
-
-def one_token(s: List[Char]) : Either[(List[Char], T), String] = {
- val somes = rules.map { (r) => munch(r._1, r._2, s, Nil) }.flatten
- if (somes == Nil) Right(s.mkString)
- else Left(somes sortBy (_._1.length) head)
-}
-
-def tokenize(cs: List[Char]) : List[T] = cs match {
- case Nil => Nil
- case _ => one_token(cs) match {
- case Left((rest, token)) => token :: tokenize(rest)
- case Right(s) => { println("Cannot tokenize: \"" + s + "\""); Nil }
- }
-}
-
-def fromString(s: String) : List[T] =
- tokenize(s.toList).filterNot(excl.contains(_))
-
-def fromFile(name: String) : List[T] =
- fromString(io.Source.fromFile(name).mkString)
-
-}
-
-
-// regular expression for specifying
-// ranges of characters
-def Range(s : List[Char]) : Rexp = s match {
- case Nil => NULL
- case c::Nil => CHAR(c)
- case c::s => ALT(CHAR(c), Range(s))
-}
-def RANGE(s: String) = Range(s.toList)
-
-
-// one or more
-def PLUS(r: Rexp) = SEQ(r, STAR(r))
-
-// many alternatives
-def Alts(rs: List[Rexp]) : Rexp = rs match {
- case Nil => NULL
- case r::Nil => r
- case r::rs => ALT(r, Alts(rs))
-}
-def ALTS(rs: Rexp*) = Alts(rs.toList)
-
-// repetitions
-def Seqs(rs: List[Rexp]) : Rexp = rs match {
- case Nil => NULL
- case r::Nil => r
- case r::rs => SEQ(r, Seqs(rs))
-}
-def SEQS(rs: Rexp*) = Seqs(rs.toList)
-
-// some convenience for typing in regular expressions
-def charlist2rexp(s : List[Char]) : Rexp = s match {
- case Nil => EMPTY
- case c::Nil => CHAR(c)
- case c::s => SEQ(CHAR(c), charlist2rexp(s))
-}
-implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
-
-}
--- a/parser1.scala Sun Dec 23 00:38:56 2012 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,88 +0,0 @@
-// A naive bottom-up parser with backtracking
-//
-// Needs:
-// :load matcher.scala
-
-// some regular expressions
-val DIGIT = RANGE("0123456789")
-val NONZERODIGIT = RANGE("123456789")
-
-val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0")
-val LPAREN = CHAR('(')
-val RPAREN = CHAR(')')
-val WHITESPACE = PLUS(RANGE(" \n"))
-val OPS = RANGE("+-*")
-
-// for classifying the strings that have been recognised
-
-abstract class Token
-case object T_WHITESPACE extends Token
-case object T_NUM extends Token
-case class T_OP(s: String) extends Token
-case object T_LPAREN extends Token
-case object T_RPAREN extends Token
-case class NT(s: String) extends Token
-
-// lexing rules for arithmetic expressions
-val lexing_rules: List[Rule[Token]]=
- List((NUMBER, (s) => T_NUM),
- (WHITESPACE, (s) => T_WHITESPACE),
- (LPAREN, (s) => T_LPAREN),
- (RPAREN, (s) => T_RPAREN),
- (OPS, (s) => T_OP(s.mkString)))
-
-// the tokenizer
-val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE))
-
-type Grammar = List[(String, List[Token])]
-
-// grammar for arithmetic expressions
-val grammar =
- List ("F" -> List(T_NUM),
- "E" -> List(T_NUM),
- "E" -> List(NT("E"), T_OP("+"), NT("E")),
- "E" -> List(NT("E"), T_OP("-"), NT("E")),
- "E" -> List(NT("E"), T_OP("*"), NT("E")),
- "E" -> List(T_LPAREN, NT("E"), T_RPAREN))
-
-
-def chop[A](ts1: List[A], prefix: List[A], ts2: List[A]) : Option[(List[A], List[A])] =
- ts1 match {
- case Nil => None
- case t::ts =>
- if (ts1.startsWith(prefix)) Some(ts2.reverse, ts1.drop(prefix.length))
- else chop(ts, prefix, t::ts2)
- }
-
-// examples for chop
-chop(List(1,2,3,4,5,6,7,8,9), List(4,5), Nil)
-chop(List(1,2,3,4,5,6,7,8,9), List(3,5), Nil)
-
-def replace[A](ts: List[A], out: List[A], in: List [A]) =
- chop(ts, out, Nil) match {
- case None => None
- case Some((before, after)) => Some(before ::: in ::: after)
- }
-
-def parse(g: Grammar, ts: List[Token]) : Boolean = {
- println(ts)
- if (ts == List(NT("E"))) true
- else {
- val tss = for ((lhs, rhs) <- g) yield replace(ts, rhs, List(NT(lhs)))
- tss.flatten.exists(parse(g, _))
- }
-}
-
-def parser(g: Grammar, s: String) = {
- println("\n")
- parse(g, Tok.fromString(s))
-}
-
-
-
-parser(grammar, "2 + 3 * 4 + 1")
-parser(grammar, "(2 + 3) * (4 + 1)")
-parser(grammar, "(2 + 3) * 4 (4 + 1)")
-
-
-
--- a/parser2.scala Sun Dec 23 00:38:56 2012 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,139 +0,0 @@
-// A naive version of parser combinators producing parse trees
-//
-// Needs
-// :load matcher.scala
-
-// some regular expressions
-val LETTER = RANGE("abcdefghijklmnopqrstuvwxyz")
-val ID = PLUS(LETTER)
-
-val DIGIT = RANGE("0123456789")
-val NONZERODIGIT = RANGE("123456789")
-val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0")
-
-val LPAREN = CHAR('(')
-val RPAREN = CHAR(')')
-
-val WHITESPACE = PLUS(RANGE(" \n"))
-val OPS = RANGE("+-*")
-
-// for classifying the strings that have been recognised
-abstract class Token
-
-case object T_WHITESPACE extends Token
-case class T_NUM(s: String) extends Token
-case class T_ID(s: String) extends Token
-case class T_OP(s: String) extends Token
-case object T_LPAREN extends Token
-case object T_RPAREN extends Token
-case object T_IF extends Token
-case object T_THEN extends Token
-case object T_ELSE extends Token
-
-// lexing rules for arithmetic expressions
-val lexing_rules: List[Rule[Token]]=
- List(("if", (s) => T_IF),
- ("then", (s) => T_THEN),
- ("else", (s) => T_ELSE),
- (NUMBER, (s) => T_NUM(s.mkString)),
- (ID, (s) => T_ID(s.mkString)),
- (WHITESPACE, (s) => T_WHITESPACE),
- (LPAREN, (s) => T_LPAREN),
- (RPAREN, (s) => T_RPAREN),
- (OPS, (s) => T_OP(s.mkString)))
-
-val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE))
-
-
-// parse trees
-abstract class ParseTree
-case class Leaf(t: Token) extends ParseTree
-case class Branch(pts: List[ParseTree]) extends ParseTree
-
-def combine(pt1: ParseTree, pt2: ParseTree) = pt1 match {
- case Leaf(t) => Branch(List(Leaf(t), pt2))
- case Branch(pts) => Branch(pts ++ List(pt2))
-}
-
-// parser combinators
-abstract class Parser {
- def parse(ts: List[Token]): Set[(ParseTree, List[Token])]
-
- def parse_all(ts: List[Token]) : Set[ParseTree] =
- for ((head, tail) <- parse(ts); if (tail == Nil)) yield head
-
- def || (right : => Parser) : Parser = new AltParser(this, right)
- def ~ (right : => Parser) : Parser = new SeqParser(this, right)
-}
-
-class AltParser(p: => Parser, q: => Parser) extends Parser {
- def parse (ts: List[Token]) = p.parse(ts) ++ q.parse(ts)
-}
-
-class SeqParser(p: => Parser, q: => Parser) extends Parser {
- def parse(ts: List[Token]) =
- for ((head1, tail1) <- p.parse(ts);
- (head2, tail2) <- q.parse(tail1)) yield (combine(head1, head2), tail2)
-}
-
-class ListParser(ps: => List[Parser]) extends Parser {
- def parse(ts: List[Token]) = ps match {
- case Nil => Set()
- case p::Nil => p.parse(ts)
- case p::ps =>
- for ((head1, tail1) <- p.parse(ts);
- (head2, tail2) <- new ListParser(ps).parse(tail1)) yield (Branch(List(head1, head2)), tail2)
- }
-}
-
-case class TokParser(tok: Token) extends Parser {
- def parse(ts: List[Token]) = ts match {
- case t::ts if (t == tok) => Set((Leaf(t), ts))
- case _ => Set ()
- }
-}
-
-implicit def token2tparser(t: Token) = TokParser(t)
-
-case object IdParser extends Parser {
- def parse(ts: List[Token]) = ts match {
- case T_ID(s)::ts => Set((Leaf(T_ID(s)), ts))
- case _ => Set ()
- }
-}
-
-case object NumParser extends Parser {
- def parse(ts: List[Token]) = ts match {
- case T_NUM(s)::ts => Set((Leaf(T_NUM(s)), ts))
- case _ => Set ()
- }
-}
-
-lazy val E: Parser = (T ~ T_OP("+") ~ E) || T // start symbol
-lazy val T: Parser = (F ~ T_OP("*") ~ T) || F
-lazy val F: Parser = (T_LPAREN ~ E ~ T_RPAREN) || NumParser
-
-println(Tok.fromString("1 + 2 + 3"))
-println(E.parse_all(Tok.fromString("1 + 2 + 3")))
-
-def eval(t: ParseTree) : Int = t match {
- case Leaf(T_NUM(n)) => n.toInt
- case Branch(List(t1, Leaf(T_OP("+")), t2)) => eval(t1) + eval(t2)
- case Branch(List(t1, Leaf(T_OP("*")), t2)) => eval(t1) * eval(t2)
- case Branch(List(Leaf(T_LPAREN), t, Leaf(T_RPAREN))) => eval(t)
-}
-
-(E.parse_all(Tok.fromString("1 + 2 + 3"))).map(eval(_))
-(E.parse_all(Tok.fromString("1 + 2 * 3"))).map(eval(_))
-
-lazy val EXPR: Parser =
- new ListParser(List(T_IF, EXPR, T_THEN, EXPR)) ||
- new ListParser(List(T_IF, EXPR, T_THEN, EXPR, T_ELSE, EXPR)) ||
- IdParser
-
-println(EXPR.parse_all(Tok.fromString("if a then b else c")))
-println(EXPR.parse_all(Tok.fromString("if a then if x then y else c")))
-
-
-
-
--- a/parser2a.scala Sun Dec 23 00:38:56 2012 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,105 +0,0 @@
-// Parser combinators including semantic actions
-// parses lists of tokens
-//
-// Needs
-// :load matcher.scala
-
-// some regular expressions
-val LETTER = RANGE("abcdefghijklmnopqrstuvwxyz")
-val ID = PLUS(LETTER)
-
-val DIGIT = RANGE("0123456789")
-val NONZERODIGIT = RANGE("123456789")
-val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0")
-
-val LPAREN = CHAR('(')
-val RPAREN = CHAR(')')
-
-val WHITESPACE = PLUS(RANGE(" \n"))
-val OPS = RANGE("+-*")
-
-// for classifying the strings that have been recognised
-abstract class Token
-
-case object T_WHITESPACE extends Token
-case class T_NUM(s: String) extends Token
-case class T_ID(s: String) extends Token
-case class T_OP(s: String) extends Token
-case object T_LPAREN extends Token
-case object T_RPAREN extends Token
-case object T_IF extends Token
-case object T_THEN extends Token
-case object T_ELSE extends Token
-
-// lexing rules for arithmetic expressions
-val lexing_rules: List[Rule[Token]]=
- List(("if", (s) => T_IF),
- ("then", (s) => T_THEN),
- ("else", (s) => T_ELSE),
- (NUMBER, (s) => T_NUM(s.mkString)),
- (ID, (s) => T_ID(s.mkString)),
- (WHITESPACE, (s) => T_WHITESPACE),
- (LPAREN, (s) => T_LPAREN),
- (RPAREN, (s) => T_RPAREN),
- (OPS, (s) => T_OP(s.mkString)))
-
-val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE))
-
-// parser combinators with return type T
-abstract class Parser[T] {
- def parse(ts: List[Token]): Set[(T, List[Token])]
-
- def parse_all(ts: List[Token]) : Set[T] =
- for ((head, tail) <- parse(ts); if (tail == Nil)) yield head
-
- def || (right : => Parser[T]) : Parser[T] = new AltParser(this, right)
- def ==>[S] (f: => T => S) : Parser [S] = new FunParser(this, f)
- def ~[S] (right : => Parser[S]) : Parser[(T, S)] = new SeqParser(this, right)
- def ~>[S] (right : => Parser[S]) : Parser[S] = this ~ right ==> (x => x._2)
- def <~[S] (right : => Parser[S]) : Parser[T] = this ~ right ==> (x => x._1)
-
-}
-
-class SeqParser[T, S](p: => Parser[T], q: => Parser[S]) extends Parser[(T, S)] {
- def parse(sb: List[Token]) =
- for ((head1, tail1) <- p.parse(sb);
- (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2)
-}
-
-class AltParser[T](p: => Parser[T], q: => Parser[T]) extends Parser[T] {
- def parse (sb: List[Token]) = p.parse(sb) ++ q.parse(sb)
-}
-
-class FunParser[T, S](p: => Parser[T], f: T => S) extends Parser[S] {
- def parse (sb: List[Token]) =
- for ((head, tail) <- p.parse(sb)) yield (f(head), tail)
-}
-
-
-case class TokParser(tok: Token) extends Parser[Token] {
- def parse(ts: List[Token]) = ts match {
- case t::ts if (t == tok) => Set((t, ts))
- case _ => Set ()
- }
-}
-
-implicit def token2tparser(t: Token) = TokParser(t)
-
-case object NumParser extends Parser[Int] {
- def parse(ts: List[Token]) = ts match {
- case T_NUM(s)::ts => Set((s.toInt, ts))
- case _ => Set ()
- }
-}
-
-lazy val E: Parser[Int] = (T ~ T_OP("+") ~ E) ==> { case ((x, y), z) => x + z } || T
-lazy val T: Parser[Int] = (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => x * z } || F
-lazy val F: Parser[Int] = (T_LPAREN ~> E <~ T_RPAREN) || NumParser
-
-println(E.parse_all(Tok.fromString("1 + 2 + 3")))
-println(E.parse_all(Tok.fromString("1 + 2 * 3")))
-println(E.parse_all(Tok.fromString("(1 + 2) * 3")))
-
-// Excercise: implement minus
-println(E.parse_all(Tok.fromString("(1 - 2) * 3")))
-println(E.parse_all(Tok.fromString("(1 + 2) * - 3")))
--- a/parser3.scala Sun Dec 23 00:38:56 2012 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,41 +0,0 @@
-package object parser {
-
-// parser combinators
-// with input type I and return type T
-//
-// needs to be compiled with scalac parser3.scala
-
-abstract class Parser[I <% Seq[_], T] {
- def parse(ts: I): Set[(T, I)]
-
- def parse_all(ts: I) : Set[T] =
- for ((head, tail) <- parse(ts); if (tail.isEmpty)) yield head
-
- def parse_single(ts: I) : T = parse_all(ts).toList match {
- case t::Nil => t
- case _ => { println ("Parse Error") ; sys.exit(-1) }
- }
-
- def || (right : => Parser[I, T]) : Parser[I, T] = new AltParser(this, right)
- def ==>[S] (f: => T => S) : Parser [I, S] = new FunParser(this, f)
- def ~[S] (right : => Parser[I, S]) : Parser[I, (T, S)] = new SeqParser(this, right)
- def ~>[S] (right : => Parser[I, S]) : Parser[I, S] = this ~ right ==> (_._2)
- def <~[S] (right : => Parser[I, S]) : Parser[I, T] = this ~ right ==> (_._1)
-}
-
-class SeqParser[I <% Seq[_], T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, (T, S)] {
- def parse(sb: I) =
- for ((head1, tail1) <- p.parse(sb);
- (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2)
-}
-
-class AltParser[I <% Seq[_], T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] {
- def parse(sb: I) = p.parse(sb) ++ q.parse(sb)
-}
-
-class FunParser[I <% Seq[_], T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] {
- def parse(sb: I) =
- for ((head, tail) <- p.parse(sb)) yield (f(head), tail)
-}
-
-}
--- a/parser4.scala Sun Dec 23 00:38:56 2012 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,82 +0,0 @@
-
-// parser combinators with input type I and return type T
-
-case class SubString(s: String, l: Int, h: Int) {
- def low = l
- def high = h
- def length = h - l
- def substring(l: Int = l, h: Int = h) = s.slice(l, h)
- def set(low: Int = l, high: Int = h) = SubString(s, low, high)
-
-}
-
-type Ctxt = List[(String, SubString)]
-
-abstract class Parser[T] {
-
- def parse(ts: SubString, ctxt: Ctxt): Set[(T, SubString)]
-
- def parse_all(s: String) : Set[T] =
- for ((head, tail) <- parse(SubString(s, 0, s.length), Nil); if (tail.substring() == "")) yield head
-
- def || (right : => Parser[T]) : Parser[T] = new AltParser(this, right)
- def ==>[S] (f: => T => S) : Parser [S] = new FunParser(this, f)
- def ~[S] (right : => Parser[S]) : Parser[(T, S)] = new SeqParser(this, right)
-}
-
-class SeqParser[T, S](p: => Parser[T], q: => Parser[S]) extends Parser[(T, S)] {
- def parse(sb: SubString, ctxt: Ctxt) =
- for ((head1, tail1) <- p.parse(sb, ctxt);
- (head2, tail2) <- q.parse(tail1, ctxt)) yield ((head1, head2), tail2)
-}
-
-class AltParser[T](p: => Parser[T], q: => Parser[T]) extends Parser[T] {
- def parse(sb: SubString, ctxt: Ctxt) = p.parse(sb, ctxt) ++ q.parse(sb, ctxt)
-}
-
-class FunParser[T, S](p: => Parser[T], f: T => S) extends Parser[S] {
- def parse(sb: SubString, ctxt: Ctxt) =
- for ((head, tail) <- p.parse(sb, ctxt)) yield (f(head), tail)
-}
-
-case class SubStringParser(s: String) extends Parser[SubString] {
- val n = s.length
- def parse(sb: SubString, ctxt: Ctxt) = {
- if (n <= sb.length && sb.substring(sb.low, sb.low + n) == s)
- Set((sb.set(high = sb.low + n), sb.set(low = sb.low + n)))
- else Set()
- }
-}
-
-implicit def string2parser(s: String) = SubStringParser(s) ==> (_.substring())
-
-class IgnLst[T](p: => Parser[T]) extends Parser[T] {
- def parse(sb: SubString, ctxt: Ctxt) = {
- if (sb.length == 0) Set()
- else for ((head, tail) <- p.parse(sb.set(high = sb.high - 1), ctxt))
- yield (head, tail.set(high = tail.high + 1))
- }
-}
-
-class CHECK[T](nt: String, p: => Parser[T]) extends Parser[T] {
- def parse(sb: SubString, ctxt: Ctxt) = {
- val should_trim = ctxt.contains (nt, sb)
- if (should_trim && sb.length == 0) Set()
- else if (should_trim) new IgnLst(p).parse(sb, (nt, sb)::ctxt)
- else p.parse(sb, (nt, sb)::ctxt)
- }
-}
-
-// ambigous grammar
-lazy val E: Parser[Int] =
- new CHECK("E", (E ~ "+" ~ E) ==> { case ((x, y), z) => x + z} ||
- (E ~ "*" ~ E) ==> { case ((x, y), z) => x * z} ||
- ("(" ~ E ~ ")") ==> { case ((x, y), z) => y} ||
- "0" ==> { (s) => 0 } ||
- "1" ==> { (s) => 1 } ||
- "2" ==> { (s) => 2 } ||
- "3" ==> { (s) => 3 })
-
-println(E.parse_all("1+2*3+3"))
-
-
--- a/parser5.scala Sun Dec 23 00:38:56 2012 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,113 +0,0 @@
-val DIGIT = RANGE("0123456789")
-val NONZERODIGIT = RANGE("123456789")
-
-val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0")
-val LPAREN = CHAR('(')
-val RPAREN = CHAR(')')
-val WHITESPACE = PLUS(RANGE(" \n"))
-val OPS = RANGE("+-*")
-
-// for classifying the strings that have been recognised
-abstract class Token
-case object T_WHITESPACE extends Token
-case class T_NUM(s: String) extends Token
-case class T_OP(s: String) extends Token
-case object T_LPAREN extends Token
-case object T_RPAREN extends Token
-
-val lexing_rules: List[Rule[Token]]=
- List((NUMBER, (s) => T_NUM(s.mkString)),
- (WHITESPACE, (s) => T_WHITESPACE),
- (LPAREN, (s) => T_LPAREN),
- (RPAREN, (s) => T_RPAREN),
- (OPS, (s) => T_OP(s.mkString)))
-
-val Tk = Tokenizer(lexing_rules, List(T_WHITESPACE))
-
-
-// parser combinators with input type I and return type T
-// and memoisation
-
-case class SubList[T](s: List[T], l: Int, h: Int) {
- def low = l
- def high = h
- def length = h - l
- def sublist(l: Int = l, h: Int = h) = s.slice(l, h)
- def set(low: Int = l, high: Int = h) = SubList(s, low, high)
-}
-
-type Ctxt[T] = List[(String, SubList[T])]
-
-abstract class Parser[I, T] {
-
- def parse(ts: SubList[I], ctxt: Ctxt[I]): Set[(T, SubList[I])]
-
- def parse_all(s: List[I]) : Set[T] =
- for ((head, tail) <- parse(SubList(s, 0, s.length), Nil); if (tail.sublist() == Nil)) yield head
-
- def || (right : => Parser[I, T]) : Parser[I, T] = new AltParser(this, right)
- def ==>[S] (f: => T => S) : Parser [I, S] = new FunParser(this, f)
- def ~[S] (right : => Parser[I, S]) : Parser[I, (T, S)] = new SeqParser(this, right)
- def ~>[S] (right : => Parser[I, S]) : Parser[I, S] = this ~ right ==> (_._2)
- def <~[S] (right : => Parser[I, S]) : Parser[I, T] = this ~ right ==> (_._1)
-}
-
-class SeqParser[I, T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, (T, S)] {
- def parse(sb: SubList[I], ctxt: Ctxt[I]) =
- for ((head1, tail1) <- p.parse(sb, ctxt);
- (head2, tail2) <- q.parse(tail1, ctxt)) yield ((head1, head2), tail2)
-}
-
-class AltParser[I, T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] {
- def parse(sb: SubList[I], ctxt: Ctxt[I]) = p.parse(sb, ctxt) ++ q.parse(sb, ctxt)
-}
-
-class FunParser[I, T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] {
- def parse(sb: SubList[I], ctxt: Ctxt[I]) =
- for ((head, tail) <- p.parse(sb, ctxt)) yield (f(head), tail)
-}
-
-case object NumParser extends Parser[Token, Int] {
- def parse(sb: SubList[Token], ctxt: Ctxt[Token]) = {
- if (0 < sb.length) sb.sublist(sb.low, sb.low + 1) match {
- case T_NUM(i)::Nil => Set((i.toInt, sb.set(low = sb.low + 1)))
- case _ => Set()
- }
- else Set()
- }
-}
-
-case class TokParser(t: Token) extends Parser[Token, Token] {
- def parse(sb: SubList[Token], ctxt: Ctxt[Token]) = {
- if (0 < sb.length && sb.sublist(sb.low, sb.low + 1) == List(t)) Set((t, sb.set(low = sb.low + 1)))
- else Set()
- }
-}
-
-implicit def token2tparser(t: Token) = TokParser(t)
-
-class IgnLst[I, T](p: => Parser[I, T]) extends Parser[I, T] {
- def parse(sb: SubList[I], ctxt: Ctxt[I]) = {
- if (sb.length == 0) Set()
- else for ((head, tail) <- p.parse(sb.set(high = sb.high - 1), ctxt))
- yield (head, tail.set(high = tail.high + 1))
- }
-}
-
-class CHECK[I, T](nt: String, p: => Parser[I, T]) extends Parser[I, T] {
- def parse(sb: SubList[I], ctxt: Ctxt[I]) = {
- val should_trim = ctxt.contains (nt, sb)
- if (should_trim && sb.length == 0) Set()
- else if (should_trim) new IgnLst(p).parse(sb, (nt, sb)::ctxt)
- else p.parse(sb, (nt, sb)::ctxt)
- }
-}
-
-lazy val E: Parser[Token, Int] =
- new CHECK("E", (E ~ T_OP("+") ~ E) ==> { case ((x, y), z) => x + z} ||
- (E ~ T_OP("*") ~ E) ==> { case ((x, y), z) => x * z} ||
- (T_LPAREN ~ E ~ T_RPAREN) ==> { case ((x, y), z) => y} ||
- NumParser)
-
-println(E.parse_all(Tk.fromString("1 + 2 * 3")))
-println(E.parse_all(Tk.fromString("(1 + 2) * 3")))
--- a/re-alt.scala Sun Dec 23 00:38:56 2012 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,115 +0,0 @@
-trait RegExp {
- def nullable: Boolean
- def derive(c: Char): RegExp
-}
-
-case object Empty extends RegExp {
- def nullable = false
- def derive(c: Char) = Empty
-}
-
-case object Eps extends RegExp {
- def nullable = true
- def derive(c: Char) = Empty
-}
-
-case class Str(s: String) extends RegExp {
- def nullable = s.isEmpty
- def derive(c: Char) =
- if (s.isEmpty || s.head != c) Empty
- else Str(s.tail)
-}
-
-case class Cat(r: RegExp, s: RegExp) extends RegExp {
- def nullable = r.nullable && s.nullable
- def derive(c: Char) =
- if (r.nullable) Or(Cat(r.derive(c), s), s.derive(c))
- else Cat(r.derive(c), s)
-}
-
-case class Star(r: RegExp) extends RegExp {
- def nullable = true
- def derive(c: Char) = Cat(r.derive(c), this)
-}
-
-case class Or(r: RegExp, s: RegExp) extends RegExp {
- def nullable = r.nullable || s.nullable
- def derive(c: Char) = Or(r.derive(c), s.derive(c))
-}
-
-case class And(r: RegExp, s: RegExp) extends RegExp {
- def nullable = r.nullable && s.nullable
- def derive(c: Char) = And(r.derive(c), s.derive(c))
-}
-
-case class Not(r: RegExp) extends RegExp {
- def nullable = !r.nullable
- def derive(c: Char) = Not(r.derive(c))
-}
-
-
-
-
-object Matcher {
- def matches(r: RegExp, s: String): Boolean = {
- if (s.isEmpty) r.nullable
- else matches(r.derive(s.head), s.tail)
- }
-}
-
-
-object Pimps {
- implicit def string2RegExp(s: String) = Str(s)
-
- implicit def regExpOps(r: RegExp) = new {
- def | (s: RegExp) = Or(r, s)
- def & (s: RegExp) = And(r, s)
- def % = Star(r)
- def %(n: Int) = rep(r, n)
- def ? = Or(Eps, r)
- def ! = Not(r)
- def ++ (s: RegExp) = Cat(r, s)
- def ~ (s: String) = Matcher.matches(r, s)
- }
-
- implicit def stringOps(s: String) = new {
- def | (r: RegExp) = Or(s, r)
- def | (r: String) = Or(s, r)
- def & (r: RegExp) = And(s, r)
- def & (r: String) = And(s, r)
- def % = Star(s)
- def % (n: Int) = rep(Str(s), n)
- def ? = Or(Eps, s)
- def ! = Not(s)
- def ++ (r: RegExp) = Cat(s, r)
- def ++ (r: String) = Cat(s, r)
- def ~ (t: String) = Matcher.matches(s, t)
- }
-
- def rep(r: RegExp, n: Int): RegExp =
- if (n <= 0) Star(r)
- else Cat(r, rep(r, n - 1))
-}
-
-
-object Test {
- import Pimps._
-
- val digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
- val int = ("+" | "-").? ++ digit.%(1)
- val real = ("+" | "-").? ++ digit.%(1) ++ ("." ++ digit.%(1)).? ++ (("e" | "E") ++ ("+" | "-").? ++ digit.%(1)).?
-
- def main(args: Array[String]) {
- val ints = List("0", "-4534", "+049", "99")
- val reals = List("0.9", "-12.8", "+91.0", "9e12", "+9.21E-12", "-512E+01")
- val errs = List("", "-", "+", "+-1", "-+2", "2-")
-
- ints.foreach(s => assert(int ~ s))
- reals.foreach(s => assert(!(int ~ s)))
- errs.foreach(s => assert(!(int ~ s)))
-
- ints.foreach(s => assert(real ~ s))
- reals.foreach(s => assert(real ~ s))
- errs.foreach(s => assert(!(real ~ s)))
- }
-}
--- a/re-internal.scala Sun Dec 23 00:38:56 2012 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,17 +0,0 @@
-
-// measures the time a function needs
-def time_needed[T](i: Int, code: => T) = {
- val start = System.nanoTime()
- for (j <- 1 to i) code
- val end = System.nanoTime()
- (end - start)/(i * 1.0e9)
-}
-
-
-for (i <- 1 to 10001 by 300) {
- val re = ("((a?){" + i + "})(a{" + i + "})")
- println(i + " " + "%.5f".format(time_needed(1, ("a" * i).matches(re))))
-}
-
-
-
--- a/re0.scala Sun Dec 23 00:38:56 2012 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,113 +0,0 @@
-
-abstract class Rexp
-
-case object NULL extends Rexp
-case object EMPTY extends Rexp
-case class CHAR(c: Char) extends Rexp
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
-case class STAR(r: Rexp) extends Rexp
-case class NOT(r: Rexp) extends Rexp
-case class REP(r: Rexp, n: Int) extends Rexp
-
-// some convenience for typing in regular expressions
-def charlist2rexp(s : List[Char]) : Rexp = s match {
- case Nil => EMPTY
- case c::Nil => CHAR(c)
- case c::s => SEQ(CHAR(c), charlist2rexp(s))
-}
-implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
-
-implicit def RexpOps(r: Rexp) = new {
- def | (s: Rexp) = ALT(r, s)
- def % = STAR(r)
- def %(n: Int) = REP(r, n)
- def %%(n: Int) = SEQ(REP(r, n), STAR(r))
- def ? = ALT(EMPTY, r)
- def unary_! = NOT(r)
- def ~ (s: Rexp) = SEQ(r, s)
-}
-
-implicit def stringOps(s: String) = new {
- def | (r: Rexp) = ALT(s, r)
- def | (r: String) = ALT(s, r)
- def % = STAR(s)
- def %(n: Int) = REP(s, n)
- def %%(n: Int) = SEQ(REP(s, n), STAR(s))
- def ? = ALT(EMPTY, s)
- def unary_! = NOT(s)
- def ~ (r: Rexp) = SEQ(s, r)
- def ~ (r: String) = SEQ(s, r)
-}
-
-
-// nullable function: tests whether the regular
-// expression can recognise the empty string
-def nullable (r: Rexp) : Boolean = r match {
- case NULL => false
- case EMPTY => true
- case CHAR(_) => false
- case ALT(r1, r2) => nullable(r1) || nullable(r2)
- case SEQ(r1, r2) => nullable(r1) && nullable(r2)
- case STAR(_) => true
- case NOT(r) => !(nullable(r))
- case REP(r, i) => if (i == 0) true else nullable(r)
-}
-
-// derivative of a regular expression w.r.t. a character
-def der (c: Char, r: Rexp) : Rexp = r match {
- case NULL => NULL
- case EMPTY => NULL
- case CHAR(d) => if (c == d) EMPTY else NULL
- case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
- case SEQ(r1, r2) =>
- if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
- else SEQ(der(c, r1), r2)
- case STAR(r) => SEQ(der(c, r), STAR(r))
- case NOT(r) => NOT(der (c, r))
- case REP(r, i) =>
- if (i == 0) NULL else SEQ(der(c, r), REP(r, i - 1))
-}
-
-// derivative w.r.t. a string (iterates der)
-def ders (s: List[Char], r: Rexp) : Rexp = s match {
- case Nil => r
- case c::s => ders(s, der(c, r))
-}
-
-// main matcher function
-def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
-
-//examples
-val digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
-val int = ("+" | "-").? ~ digit.%%(1)
-val real = ("+" | "-").? ~ digit.%%(1) ~ ("." ~ digit.%%(1)).? ~ (("e" | "E") ~ ("+" | "-").? ~ digit.%%(1)).?
-
-val ints = List("0", "-4534", "+049", "99")
-val reals = List("0.9", "-12.8", "+91.0", "9e12", "+9.21E-12", "-512E+01")
-val errs = List("", "-", "+", "+-1", "-+2", "2-")
-
-ints.map(s => matcher(int, s))
-reals.map(s => matcher(int, s))
-errs.map(s => matcher(int, s))
-
-ints.map(s => matcher(real, s))
-reals.map(s => matcher(real, s))
-errs.map(s => matcher(real, s))
-
-
-
-def RTEST(n: Int) = ("a".? %(n)) ~ ("a" %(n))
-
-def time_needed[T](i: Int, code: => T) = {
- val start = System.nanoTime()
- for (j <- 1 to i) code
- val end = System.nanoTime()
- (end - start)/(i * 1.0e9)
-}
-
-for (i <- 1 to 29) {
- println(i + ": " + "%.5f".format(time_needed(1, matcher(RTEST(i), "a" * i))))
-}
-
-
--- a/re1.scala Sun Dec 23 00:38:56 2012 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,80 +0,0 @@
-
-abstract class Rexp
-
-case object NULL extends Rexp
-case object EMPTY extends Rexp
-case class CHAR(c: Char) extends Rexp
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
-case class STAR(r: Rexp) extends Rexp
-
-// some convenience for typing in regular expressions
-def charlist2rexp(s : List[Char]) : Rexp = s match {
- case Nil => EMPTY
- case c::Nil => CHAR(c)
- case c::s => SEQ(CHAR(c), charlist2rexp(s))
-}
-implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
-
-
-// nullable function: tests whether the regular
-// expression can recognise the empty string
-def nullable (r: Rexp) : Boolean = r match {
- case NULL => false
- case EMPTY => true
- case CHAR(_) => false
- case ALT(r1, r2) => nullable(r1) || nullable(r2)
- case SEQ(r1, r2) => nullable(r1) && nullable(r2)
- case STAR(_) => true
-}
-
-// derivative of a regular expression w.r.t. a character
-def der (c: Char, r: Rexp) : Rexp = r match {
- case NULL => NULL
- case EMPTY => NULL
- case CHAR(d) => if (c == d) EMPTY else NULL
- case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
- case SEQ(r1, r2) =>
- if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
- else SEQ(der(c, r1), r2)
- case STAR(r) => SEQ(der(c, r), STAR(r))
-}
-
-// derivative w.r.t. a string (iterates der)
-def ders (s: List[Char], r: Rexp) : Rexp = s match {
- case Nil => r
- case c::s => ders(s, der(c, r))
-}
-
-// main matcher function
-def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
-
-//example
-//val r = STAR(ALT(SEQ(CHAR('a'), CHAR('b')), CHAR('b')))
-//der('b', r)
-//der('b', r)
-
-//one or zero
-def OPT(r: Rexp) = ALT(r, EMPTY)
-
-//n-times
-def NTIMES(r: Rexp, n: Int) : Rexp = n match {
- case 0 => EMPTY
- case 1 => r
- case n => SEQ(r, NTIMES(r, n - 1))
-}
-
-def RTEST(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n))
-
-def time_needed[T](i: Int, code: => T) = {
- val start = System.nanoTime()
- for (j <- 1 to i) code
- val end = System.nanoTime()
- (end - start)/(i * 1.0e9)
-}
-
-for (i <- 1 to 29) {
- println(i + ": " + "%.5f".format(time_needed(1, matcher(RTEST(i), "a" * i))))
-}
-
-
--- a/re2.scala Sun Dec 23 00:38:56 2012 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,100 +0,0 @@
-
-abstract class Rexp {
- def simp : Rexp = this
-}
-
-case object NULL extends Rexp
-case object EMPTY extends Rexp
-case class CHAR(c: Char) extends Rexp
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp {
- override def simp = (r1.simp, r2.simp) match {
- case (NULL, r) => r
- case (r, NULL) => r
- case (r, EMPTY) => if (nullable(r)) r else ALT(r, EMPTY)
- case (EMPTY, r) => if (nullable(r)) r else ALT(r, EMPTY)
- case (r1, r2) => if (r1 == r2) r1 else ALT(r1, r2)
- }
-}
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp {
- override def simp = (r1.simp, r2.simp) match {
- case (NULL, _) => NULL
- case (_, NULL) => NULL
- case (EMPTY, r) => r
- case (r, EMPTY) => r
- case (r1, r2) => SEQ(r1, r2)
- }
-}
-case class STAR(r: Rexp) extends Rexp {
- override def simp = r.simp match {
- case NULL => EMPTY
- case EMPTY => EMPTY
- case r => STAR(r)
- }
-}
-
-// some convenience for typing in regular expressions
-def charlist2rexp(s : List[Char]) : Rexp = s match {
- case Nil => EMPTY
- case c::Nil => CHAR(c)
- case c::s => SEQ(CHAR(c), charlist2rexp(s))
-}
-implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
-
-
-// nullable function: tests whether the regular
-// expression can recognise the empty string
-def nullable (r: Rexp) : Boolean = r match {
- case NULL => false
- case EMPTY => true
- case CHAR(_) => false
- case ALT(r1, r2) => nullable(r1) || nullable(r2)
- case SEQ(r1, r2) => nullable(r1) && nullable(r2)
- case STAR(_) => true
-}
-
-// derivative of a regular expression w.r.t. a character
-def der (c: Char, r: Rexp) : Rexp = r match {
- case NULL => NULL
- case EMPTY => NULL
- case CHAR(d) => if (c == d) EMPTY else NULL
- case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
- case SEQ(r1, r2) =>
- if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
- else SEQ(der(c, r1), r2)
- case STAR(r) => SEQ(der(c, r), STAR(r))
-}
-
-// derivative w.r.t. a string (iterates der)
-def ders (s: List[Char], r: Rexp) : Rexp = s match {
- case Nil => r
- case c::s => ders(s, der(c, r).simp)
-}
-
-// main matcher function
-def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
-
-
-//one or zero
-def OPT(r: Rexp) = ALT(r, EMPTY)
-
-//n-times
-def NTIMES(r: Rexp, n: Int) : Rexp = n match {
- case 0 => EMPTY
- case 1 => r
- case n => SEQ(r, NTIMES(r, n - 1))
-}
-
-def RTEST(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n))
-
-def time_needed[T](i: Int, code: => T) = {
- val start = System.nanoTime()
- for (j <- 1 to i) code
- val end = System.nanoTime()
- (end - start)/(i * 1.0e9)
-}
-
-for (i <- 1 to 100) {
- println(i + ": " + "%.5f".format(time_needed(1, matcher(RTEST(i), "a" * i))))
-}
-
-
--- a/re3.scala Sun Dec 23 00:38:56 2012 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,106 +0,0 @@
-
-abstract class Rexp {
- def simp : Rexp = this
-}
-
-case object NULL extends Rexp
-case object EMPTY extends Rexp
-case class CHAR(c: Char) extends Rexp
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp {
- override def simp = (r1.simp, r2.simp) match {
- case (NULL, r) => r
- case (r, NULL) => r
- case (r, EMPTY) => if (nullable(r)) r else ALT(r, EMPTY)
- case (EMPTY, r) => if (nullable(r)) r else ALT(r, EMPTY)
- case (r1, r2) => if (r1 == r2) r1 else ALT(r1, r2)
- }
-}
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp {
- override def simp = (r1.simp, r2.simp) match {
- case (NULL, _) => NULL
- case (_, NULL) => NULL
- case (EMPTY, r) => r
- case (r, EMPTY) => r
- case (r1, r2) => SEQ(r1, r2)
- }
-}
-case class STAR(r: Rexp) extends Rexp {
- override def simp = r.simp match {
- case NULL => EMPTY
- case EMPTY => EMPTY
- case r => STAR(r)
- }
-}
-case class NTIMES(r: Rexp, n: Int) extends Rexp {
- override def simp = if (n == 0) EMPTY else
- r.simp match {
- case NULL => NULL
- case EMPTY => EMPTY
- case r => NTIMES(r, n)
- }
-}
-
-// some convenience for typing in regular expressions
-def charlist2rexp(s : List[Char]) : Rexp = s match {
- case Nil => EMPTY
- case c::Nil => CHAR(c)
- case c::s => SEQ(CHAR(c), charlist2rexp(s))
-}
-implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
-
-
-// nullable function: tests whether the regular
-// expression can recognise the empty string
-def nullable (r: Rexp) : Boolean = r match {
- case NULL => false
- case EMPTY => true
- case CHAR(_) => false
- case ALT(r1, r2) => nullable(r1) || nullable(r2)
- case SEQ(r1, r2) => nullable(r1) && nullable(r2)
- case STAR(_) => true
- case NTIMES(r, i) => if (i == 0) true else nullable(r)
-}
-
-// derivative of a regular expression w.r.t. a character
-def der (c: Char, r: Rexp) : Rexp = r match {
- case NULL => NULL
- case EMPTY => NULL
- case CHAR(d) => if (c == d) EMPTY else NULL
- case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
- case SEQ(r1, r2) =>
- if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
- else SEQ(der(c, r1), r2)
- case STAR(r) => SEQ(der(c, r), STAR(r))
- case NTIMES(r, i) =>
- if (i == 0) NULL else SEQ(der(c, r), NTIMES(r, i - 1))
-}
-
-// derivative w.r.t. a string (iterates der)
-def ders (s: List[Char], r: Rexp) : Rexp = s match {
- case Nil => r
- case c::s => ders(s, der(c, r).simp)
-}
-
-// main matcher function
-def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
-
-
-
-//one or zero
-def OPT(r: Rexp) = ALT(r, EMPTY)
-
-def RTEST(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n))
-
-def time_needed[T](i: Int, code: => T) = {
- val start = System.nanoTime()
- for (j <- 1 to i) code
- val end = System.nanoTime()
- (end - start)/(i * 1.0e9)
-}
-
-
-for (i <- 1 to 11001 by 500) {
- println(i + " " + + " " + time_needed(1, matcher(RTEST(i), "a" * i)))
-}
-
-
--- a/regexp.scala Sun Dec 23 00:38:56 2012 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,106 +0,0 @@
-// regular expressions
-abstract class Rexp
-
-case object NULL extends Rexp
-case object EMPTY extends Rexp
-case class CHAR(c: Char) extends Rexp
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
-case class STAR(r: Rexp) extends Rexp
-
-// some convenience for typing in regular expressions
-def charlist2rexp(s : List[Char]) : Rexp = s match {
- case Nil => EMPTY
- case c::Nil => CHAR(c)
- case c::s => SEQ(CHAR(c), charlist2rexp(s))
-}
-implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
-
-
-// for example
-println(STAR("abc"))
-
-// produces STAR(SEQ(CHAR(a),SEQ(CHAR(b),SEQ(CHAR(c),EMPTY))))
-
-
-
-// a simple-minded regular expression matcher:
-// it loops for examples like STAR(EMPTY) with
-// strings this regular expression does not match
-
-def smatchers(rs: List[Rexp], s: List[Char]) : Boolean = (rs, s) match {
- case (NULL::rs, s) => false
- case (EMPTY::rs, s) => smatchers(rs, s)
- case (CHAR(c)::rs, Nil) => false
- case (CHAR(c)::rs, d::s) => (c ==d) && smatchers(rs, s)
- case (ALT(r1, r2)::rs, s) => smatchers(r1::rs, s) || smatchers(r2::rs, s)
- case (SEQ(r1, r2)::rs, s) => smatchers(r1::r2::rs, s)
- case (STAR(r)::rs, s) => smatchers(rs, s) || smatchers(r::STAR(r)::rs, s)
- case (Nil, s) => s == Nil
-}
-
-def smatcher(r: Rexp, s: String) = smatchers(List(r), s.toList)
-
-// regular expression: a
-println(smatcher(CHAR('a'), "ab"))
-
-// regular expression: a + (b o c)
-println(smatcher(ALT(CHAR('a'), SEQ(CHAR('b'), CHAR('c'))), "ab"))
-
-// regular expression: a + (b o c)
-println(smatcher(ALT(CHAR('a'), SEQ(CHAR('b'), CHAR('c'))), "bc"))
-
-// loops for regular expression epsilon*
-//println(smatcher(STAR(EMPTY), "a"))
-
-
-
-// Regular expression matcher that works properly
-//================================================
-
-// nullable function: tests whether the regular
-// expression can recognise the empty string
-def nullable (r: Rexp) : Boolean = r match {
- case NULL => false
- case EMPTY => true
- case CHAR(_) => false
- case ALT(r1, r2) => nullable(r1) || nullable(r2)
- case SEQ(r1, r2) => nullable(r1) && nullable(r2)
- case STAR(_) => true
-}
-
-// derivative of a regular expression w.r.t. a character
-def der (c: Char, r: Rexp) : Rexp = r match {
- case NULL => NULL
- case EMPTY => NULL
- case CHAR(d) => if (c == d) EMPTY else NULL
- case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
- case SEQ(r1, r2) =>
- if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
- else SEQ(der(c, r1), r2)
- case STAR(r) => SEQ(der(c, r), STAR(r))
-}
-
-// derivative w.r.t. a string (iterates der)
-def ders (s: List[Char], r: Rexp) : Rexp = s match {
- case Nil => r
- case c::s => ders(s, der(c, r))
-}
-
-// main matcher function
-def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
-
-
-//examples
-
-println(matcher(SEQ(STAR("a"), STAR("b")), "bbaaa"))
-println(matcher(ALT(STAR("a"), STAR("b")), ""))
-println(matcher("abc", ""))
-println(matcher(STAR(ALT(EMPTY, "a")), ""))
-println(matcher(STAR(EMPTY), "a"))
-println(matcher("cab","cab"))
-println(matcher(STAR("a"),"aaa"))
-println(matcher("cab" ,"cab"))
-println(matcher(STAR("a"),"aaa"))
-
-
--- a/regexp2.scala Sun Dec 23 00:38:56 2012 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,123 +0,0 @@
-
-// regular expressions including NOT
-abstract class Rexp
-
-case object NULL extends Rexp
-case object EMPTY extends Rexp
-case class CHAR(c: Char) extends Rexp
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
-case class STAR(r: Rexp) extends Rexp
-case class NOT(r: Rexp) extends Rexp
-
-
-// some convenience for typing in regular expressions
-def charlist2rexp(s : List[Char]) : Rexp = s match {
- case Nil => EMPTY
- case c::Nil => CHAR(c)
- case c::s => SEQ(CHAR(c), charlist2rexp(s))
-}
-implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
-
-
-// nullable function: tests whether the regular
-// expression can recognise the empty string
-def nullable (r: Rexp) : Boolean = r match {
- case NULL => false
- case EMPTY => true
- case CHAR(_) => false
- case ALT(r1, r2) => nullable(r1) || nullable(r2)
- case SEQ(r1, r2) => nullable(r1) && nullable(r2)
- case STAR(_) => true
- case NOT(r) => !(nullable(r))
-}
-
-// tests whether a regular expression
-// cannot recognise more
-def no_more (r: Rexp) : Boolean = r match {
- case NULL => true
- case EMPTY => false
- case CHAR(_) => false
- case ALT(r1, r2) => no_more(r1) && no_more(r2)
- case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1)
- case STAR(_) => false
- case NOT(r) => !(no_more(r))
-}
-
-
-// derivative of a regular expression w.r.t. a character
-def der (c: Char, r: Rexp) : Rexp = r match {
- case NULL => NULL
- case EMPTY => NULL
- case CHAR(d) => if (c == d) EMPTY else NULL
- case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
- case SEQ(r1, r2) =>
- if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
- else SEQ(der(c, r1), r2)
- case STAR(r) => SEQ(der(c, r), STAR(r))
- case NOT(r) => NOT(der (c, r))
-}
-
-
-// regular expression for specifying
-// ranges of characters
-def RANGE(s : List[Char]) : Rexp = s match {
- case Nil => NULL
- case c::Nil => CHAR(c)
- case c::s => ALT(CHAR(c), RANGE(s))
-}
-
-//one or more
-def PLUS(r: Rexp) = SEQ(r, STAR(r))
-
-
-//some regular expressions
-val LOWERCASE = RANGE("abcdefghijklmnopqrstuvwxyz".toList)
-val UPPERCASE = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZ".toList)
-val LETTER = ALT(LOWERCASE, UPPERCASE)
-val DIGIT = RANGE("0123456789".toList)
-val NONZERODIGIT = RANGE("123456789".toList)
-
-val IDENT = SEQ(LETTER, STAR(ALT(LETTER,DIGIT)))
-val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0")
-val WHITESPACE = RANGE(" \n".toList)
-val WHITESPACES = PLUS(WHITESPACE)
-
-val ALL = ALT(ALT(LETTER, DIGIT), WHITESPACE)
-val COMMENT = SEQ(SEQ("/*", NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL)))), "*/")
-
-
-// an example list of regular expressions
-val regs: List[Rexp]= List("if", "then", "else", "+", IDENT, NUMBER, WHITESPACES, COMMENT)
-
-
-def error (s: String) = throw new IllegalArgumentException ("Could not lex " + s)
-
-def munch(r: Rexp, s: List[Char], t: List[Char]) : Option[(List[Char], List[Char])] =
- s match {
- case Nil if (nullable(r)) => Some(Nil, t)
- case Nil => None
- case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, t)
- case c::s if (no_more(der (c, r))) => None
- case c::s => munch(der (c, r), s, t ::: List(c))
- }
-
-def one_string (regs: List[Rexp], s: List[Char]) : (List[Char], List[Char]) = {
- val somes = regs.map { munch(_, s, Nil) } .flatten
- if (somes == Nil) error(s.mkString) else (somes sortBy (_._1.length) head)
-}
-
-def tokenize (regs: List[Rexp], s: List[Char]) : List[String] = s match {
- case Nil => Nil
- case _ => one_string(regs, s) match {
- case (rest, s) => s.mkString :: tokenize(regs, rest)
- }
-}
-
-//examples
-println(tokenize(regs, "if true then then 42 else +".toList))
-println(tokenize(regs, "if+true+then+then+42+else +".toList))
-println(tokenize(regs, "ifff if 34 34".toList))
-println(tokenize(regs, "/*ifff if */ hhjj /*34 */".toList))
-println(tokenize(regs, "/* if true then */ then 42 else +".toList))
-//println(tokenize(regs, "ifff $ if 34".toList)) // causes an error because of the symbol $
--- a/regexp3.scala Sun Dec 23 00:38:56 2012 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,141 +0,0 @@
-
-// regular expressions including NOT
-abstract class Rexp
-
-case object NULL extends Rexp
-case object EMPTY extends Rexp
-case class CHAR(c: Char) extends Rexp
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
-case class STAR(r: Rexp) extends Rexp
-case class NOT(r: Rexp) extends Rexp
-
-
-// some convenience for typing in regular expressions
-def charlist2rexp(s : List[Char]) : Rexp = s match {
- case Nil => EMPTY
- case c::Nil => CHAR(c)
- case c::s => SEQ(CHAR(c), charlist2rexp(s))
-}
-implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
-
-
-// nullable function: tests whether the regular
-// expression can recognise the empty string
-def nullable (r: Rexp) : Boolean = r match {
- case NULL => false
- case EMPTY => true
- case CHAR(_) => false
- case ALT(r1, r2) => nullable(r1) || nullable(r2)
- case SEQ(r1, r2) => nullable(r1) && nullable(r2)
- case STAR(_) => true
- case NOT(r) => !(nullable(r))
-}
-
-// tests whether a regular expression
-// cannot recognise more
-def no_more (r: Rexp) : Boolean = r match {
- case NULL => true
- case EMPTY => false
- case CHAR(_) => false
- case ALT(r1, r2) => no_more(r1) && no_more(r2)
- case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1)
- case STAR(_) => false
- case NOT(r) => !(no_more(r))
-}
-
-
-// derivative of a regular expression w.r.t. a character
-def der (c: Char, r: Rexp) : Rexp = r match {
- case NULL => NULL
- case EMPTY => NULL case CHAR(d) => if (c == d) EMPTY else NULL
- case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
- case SEQ(r1, r2) =>
- if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
- else SEQ(der(c, r1), r2)
- case STAR(r) => SEQ(der(c, r), STAR(r))
- case NOT(r) => NOT(der (c, r))
-}
-
-// regular expression for specifying
-// ranges of characters
-def RANGE(s : List[Char]) : Rexp = s match {
- case Nil => NULL
- case c::Nil => CHAR(c)
- case c::s => ALT(CHAR(c), RANGE(s))
-}
-
-// one or more
-def PLUS(r: Rexp) = SEQ(r, STAR(r))
-
-// some regular expressions
-val LOWERCASE = RANGE("abcdefghijklmnopqrstuvwxyz".toList)
-val UPPERCASE = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZ".toList)
-val LETTER = ALT(LOWERCASE, UPPERCASE)
-val DIGIT = RANGE("0123456789".toList)
-val NONZERODIGIT = RANGE("123456789".toList)
-
-val IDENT = SEQ(LETTER, STAR(ALT(LETTER,DIGIT)))
-val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0")
-val WHITESPACE = RANGE(" \n".toList)
-val WHITESPACES = PLUS(WHITESPACE)
-
-val ALL = ALT(ALT(LETTER, DIGIT), WHITESPACE)
-val COMMENT = SEQ(SEQ("/*", NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL)))), "*/")
-
-
-// for classifying the strings that have been recognised
-abstract class Token
-
-case object T_WHITESPACE extends Token
-case object T_COMMENT extends Token
-case class T_IDENT(s: String) extends Token
-case class T_OP(s: String) extends Token
-case class T_NUM(n: Int) extends Token
-case class T_KEYWORD(s: String) extends Token
-
-
-// an example list of syntactic rules
-type Rule = (Rexp, List[Char] => Token)
-
-val rules: List[Rule]=
- List(("if", (s) => T_KEYWORD(s.mkString)),
- ("then", (s) => T_KEYWORD(s.mkString)),
- ("else", (s) => T_KEYWORD(s.mkString)),
- ("+", (s) => T_OP(s.mkString)),
- (IDENT, (s) => T_IDENT(s.mkString)),
- (NUMBER, (s) => T_NUM(s.mkString.toInt)),
- (WHITESPACES, (s) => T_WHITESPACE),
- (COMMENT, (s) => T_COMMENT))
-
-
-def error (s: String) = throw new IllegalArgumentException ("Cannot tokenize: " + s)
-
-def munch(r: Rexp, action: List[Char] => Token, s: List[Char], t: List[Char]) : Option[(List[Char], Token)] =
- s match {
- case Nil if (nullable(r)) => Some(Nil, action(t))
- case Nil => None
- case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, action(t))
- case c::s if (no_more(der (c, r))) => None
- case c::s => munch(der (c, r), action, s, t ::: List(c))
- }
-
-def one_token (rs: List[Rule], s: List[Char]) : (List[Char], Token) = {
- val somes = rs.map { (r) => munch(r._1, r._2, s, Nil) } .flatten
- if (somes == Nil) error(s.mkString) else (somes sortBy (_._1.length) head)
-}
-
-def tokenize (rs: List[Rule], s: List[Char]) : List[Token] = s match {
- case Nil => Nil
- case _ => one_token(rs, s) match {
- case (rest, token) => token :: tokenize(rs, rest)
- }
-}
-
-//examples
-println(tokenize(rules, "if true then then 42 else +".toList))
-println(tokenize(rules, "if+true+then+then+42+else +".toList))
-println(tokenize(rules, "ifff if 34 34".toList))
-println(tokenize(rules, "/*ifff if */ hhjj /*34 */".toList))
-println(tokenize(rules, "/* if true then */ then 42 else +".toList))
-//println(tokenize(rules, "ifff $ if 34".toList)) // causes an error because of the symbol $
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/S_grammar-token.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,47 @@
+//:load matcher.scala
+//:load parser3.scala
+
+abstract class Token
+case object T_ONE extends Token
+
+val lexing_rules : List[Rule[Token]] =
+ List(("1", (s: List[Char]) => T_ONE))
+
+val T = Tokenizer(lexing_rules)
+
+case class TokParser(tok: Token) extends Parser[List[Token], Token] {
+ def parse(ts: List[Token]) = ts match {
+ case t::ts if (t == tok) => Set((t, ts))
+ case _ => Set ()
+ }
+}
+implicit def token2tokparser(t: Token) = TokParser(t)
+
+case object EmpParser extends Parser[List[Token], String] {
+ def parse(ts: List[Token]) = Set(("", ts))
+}
+
+
+lazy val Su: Parser[List[Token], String] =
+ (T_ONE ~ Su) ==> { case (x, y) => "1" + y} || EmpParser
+
+
+def time_needed[T](i: Int, code: => T) = {
+ val start = System.nanoTime()
+ for (j <- 1 to i) code
+ val end = System.nanoTime()
+ (end - start)/(i * 1.0e9)
+}
+
+def test(i: Int) = {
+ val result = Su.parse_all(T.fromString("1" * i))
+ //print(result.size + " ")
+}
+
+
+for (i <- 1 to 1000 by 50) {
+ print(i + " ")
+ print("%.5f".format(time_needed(1, test(i))))
+ print("\n")
+}
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/S_grammar.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,52 @@
+//:load parser3.scala
+
+case class StringParser(s: String) extends Parser[String, String] {
+ def parse(ts: String) = {
+ if (s.length <= ts.length && ts.startsWith(s)) Set((s, ts.drop(s.length)))
+ else Set()
+ }
+}
+
+implicit def string2parser(s: String) = StringParser(s)
+
+def time_needed[T](i: Int, code: => T) = {
+ val start = System.nanoTime()
+ for (j <- 1 to i) code
+ val end = System.nanoTime()
+ (end - start)/(i * 1.0e9)
+}
+
+// unambiguous grammar
+
+lazy val U: Parser[String, String] =
+ ("1" ~ U) ==> { case (x, y) => "1" + y} || ""
+
+def test1(i: Int) = {
+ val result = U.parse_all("1" * i)
+ //print(result.size + " ")
+}
+
+for (i <- 1 to 1000 by 50) {
+ print(i + " ")
+ print("%.5f".format(time_needed(1, test1(i))))
+ print("\n")
+}
+
+
+
+// ambiguous grammar
+// n = 16 -> over 35 million parse trees
+
+lazy val S: Parser[String, String] =
+ ("1" ~ S ~ S) ==> { case ((x, y), z) => "1" + y + z} || ""
+
+def test2(i: Int) = {
+ val result = S.parse_all("1" * i)
+ print(result.size + " ")
+}
+
+for (i <- 1 to 30) {
+ print(i + " ")
+ print("%.5f".format(time_needed(1, test2(i))))
+ print("\n")
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/Term_grammar.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,62 @@
+//:load matcher.scala
+//:load parser3.scala
+
+// some regular expressions
+val LETTER = RANGE("abcdefghijklmnopqrstuvwxyz")
+val ID = PLUS(LETTER)
+
+val DIGIT = RANGE("0123456789")
+val NONZERODIGIT = RANGE("123456789")
+val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0")
+
+val LPAREN = CHAR('(')
+val RPAREN = CHAR(')')
+
+val WHITESPACE = PLUS(RANGE(" \n"))
+val OPS = RANGE("+-*")
+
+// for classifying the strings that have been lexed
+abstract class Token
+
+case object T_WHITESPACE extends Token
+case class T_NUM(s: String) extends Token
+case class T_OP(s: String) extends Token
+case object T_LPAREN extends Token
+case object T_RPAREN extends Token
+
+
+// lexing rules for arithmetic expressions
+val lexing_rules: List[Rule[Token]]=
+ List((NUMBER, (s) => T_NUM(s.mkString)),
+ (WHITESPACE, (s) => T_WHITESPACE),
+ (LPAREN, (s) => T_LPAREN),
+ (RPAREN, (s) => T_RPAREN),
+ (OPS, (s) => T_OP(s.mkString)))
+
+val Tk = Tokenizer(lexing_rules, List(T_WHITESPACE))
+
+
+case class TokParser(tok: Token) extends Parser[List[Token], Token] {
+ def parse(ts: List[Token]) = ts match {
+ case t::ts if (t == tok) => Set((t, ts))
+ case _ => Set ()
+ }
+}
+implicit def token2tparser(t: Token) = TokParser(t)
+
+case object NumParser extends Parser[List[Token], Int] {
+ def parse(ts: List[Token]) = ts match {
+ case T_NUM(s)::ts => Set((s.toInt, ts))
+ case _ => Set ()
+ }
+}
+
+lazy val E: Parser[List[Token], Int] = (T ~ T_OP("+") ~ E) ==> { case ((x, y), z) => x + z } || T
+lazy val T: Parser[List[Token], Int] = (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => x * z } || F
+lazy val F: Parser[List[Token], Int] = (T_LPAREN ~> E <~ T_RPAREN) || NumParser
+
+println(E.parse_all(Tk.fromString("1 + 2 + 3")))
+println(E.parse_all(Tk.fromString("1 + 2 * 3")))
+println(E.parse_all(Tk.fromString("(1 + 2) * 3")))
+println(E.parse_all(Tk.fromString("(14 + 2) * (3 + 2)")))
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/app0.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,6 @@
+import io.Source
+
+def get_page(url: String) : String = {
+ Source.fromURL(url).take(10000).mkString
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/app1.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,12 @@
+def get_page(url: String) : String = {
+ try {
+ Source.fromURL(url).take(10000).mkString
+ }
+ catch {
+ case e => {
+ println(" Problem with: " + url)
+ ""
+ }
+ }
+}
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/app2.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,16 @@
+val http_pattern = """\"https?://[^\"]*\"""".r
+
+def unquote(s: String) = s.drop(1).dropRight(1)
+
+def get_all_URLs(page: String) : Set[String] = {
+ (http_pattern.findAllIn(page)).map { unquote(_) }.toSet
+}
+
+def crawl(url: String, n: Int) : Unit = {
+ if (n == 0) ()
+ else {
+ println("Visiting: " + n + " " + url)
+ for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1)
+ }
+}
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/app3.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,10 @@
+val my_urls = """urbanc""".r
+
+def crawl(url: String, n: Int) : Unit = {
+ if (n == 0) ()
+ else if (my_urls.findFirstIn(url) == None) ()
+ else {
+ println("Visiting: " + n + " " + url)
+ for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1)
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/app4.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,14 @@
+val http_pattern = """\"https?://[^\"]*\"""".r
+val my_urls = """urbanc""".r
+val email_pattern =
+ """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r
+
+def crawl(url: String, n: Int) : Unit = {
+ if (n == 0) ()
+ else {
+ println("Visiting: " + n + " " + url)
+ val page = get_page(url)
+ println(email_pattern.findAllIn(page).mkString("\n"))
+ for (u <- get_all_URLs(page)) crawl(u, n - 1)
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/app5.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,8 @@
+def nullable (r: Rexp) : Boolean = r match {
+ case NULL => false
+ case EMPTY => true
+ case CHAR(_) => false
+ case ALT(r1, r2) => nullable(r1) || nullable(r2)
+ case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ case STAR(_) => true
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/app51.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,8 @@
+abstract class Rexp
+
+case object NULL extends Rexp
+case object EMPTY extends Rexp
+case class CHAR(c: Char) extends Rexp
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+case class STAR(r: Rexp) extends Rexp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/app6.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,11 @@
+def deriv (r: Rexp, c: Char) : Rexp = r match {
+ case NULL => NULL
+ case EMPTY => NULL
+ case CHAR(d) => if (c == d) EMPTY else NULL
+ case ALT(r1, r2) => ALT(deriv(r1, c), deriv(r2, c))
+ case SEQ(r1, r2) =>
+ if (nullable(r1)) ALT(SEQ(deriv(r1, c), r2), deriv(r2, c))
+ else SEQ(deriv(r1, c), r2)
+ case STAR(r) => SEQ(deriv(r, c), STAR(r))
+}
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/app7.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,16 @@
+abstract class Parser[I, T] {
+ def parse(ts: I): Set[(T, I)]
+
+ def parse_all(ts: I) : Set[T] =
+ for ((head, tail) <- parse(ts); if (tail.isEmpty))
+ yield head
+
+ def || (right : => Parser[I, T]) : Parser[I, T] =
+ new AltParser(this, right)
+ def ==>[S] (f: => T => S) : Parser [I, S] =
+ new FunParser(this, f)
+ def ~[S] (right : => Parser[I, S]) : Parser[I, (T, S)] =
+ new SeqParser(this, right)
+}
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/app8.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,23 @@
+class SeqParser[I, T, S](p: => Parser[I, T],
+ q: => Parser[I, S])
+ extends Parser[I, (T, S)] {
+ def parse(sb: I) =
+ for ((head1, tail1) <- p.parse(sb);
+ (head2, tail2) <- q.parse(tail1))
+ yield ((head1, head2), tail2)
+}
+
+class AltParser[I, T](p: => Parser[I, T],
+ q: => Parser[I, T])
+ extends Parser[I, T] {
+ def parse(sb: I) = p.parse(sb) ++ q.parse(sb)
+}
+
+class FunParser[I, T, S](p: => Parser[I, T], f: T => S)
+ extends Parser[I, S] {
+ def parse(sb: I) =
+ for ((head, tail) <- p.parse(sb))
+ yield (f(head), tail)
+}
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/automata.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,106 @@
+
+// a class for deterministic finite automata,
+// the type of states is kept polymorphic
+
+case class Automaton[A](start: A, states: Set[A], delta: Map[(A, Char), A], fins: Set[A]) {
+
+ // the transition function lifted to list of characters
+ def deltas(q: A, cs: List[Char]) : Either[A, String] =
+ if (states.contains(q)) cs match {
+ case Nil => Left(q)
+ case c::cs =>
+ if (delta.isDefinedAt(q, c)) deltas(delta(q, c), cs)
+ else Right(q + " does not have a transition for " + c)
+ }
+ else Right(q + " is not a state of the automaton")
+
+ // wether a string is accepted by the automaton
+ def accepts(s: String) = deltas(start, s.toList) match {
+ case Left(q) => fins.contains(q)
+ case _ => false
+ }
+}
+
+
+// translating a regular expression into a finite
+// automaton
+
+abstract class Rexp
+
+case object NULL extends Rexp
+case object EMPTY extends Rexp
+case class CHAR(c: Char) extends Rexp
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+case class STAR(r: Rexp) extends Rexp
+
+implicit def string2rexp(s : String) = {
+ def chars2rexp (cs: List[Char]) : Rexp = cs match {
+ case Nil => EMPTY
+ case c::Nil => CHAR(c)
+ case c::cs => SEQ(CHAR(c), chars2rexp(cs))
+ }
+ chars2rexp(s.toList)
+}
+
+def nullable (r: Rexp) : Boolean = r match {
+ case NULL => false
+ case EMPTY => true
+ case CHAR(_) => false
+ case ALT(r1, r2) => nullable(r1) || nullable(r2)
+ case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ case STAR(_) => true
+}
+
+def der (r: Rexp, c: Char) : Rexp = r match {
+ case NULL => NULL
+ case EMPTY => NULL
+ case CHAR(d) => if (c == d) EMPTY else NULL
+ case ALT(r1, r2) => ALT(der(r1, c), der(r2, c))
+ case SEQ(r1, r2) => if (nullable(r1)) ALT(SEQ(der(r1, c), r2), der(r2, c))
+ else SEQ(der(r1, c), r2)
+ case STAR(r) => SEQ(der(r, c), STAR(r))
+}
+
+
+// Here we construct an automaton whose
+// states are regular expressions
+type State = Rexp
+type States = Set[State]
+type Transition = Map[(State, Char), State]
+
+// we use as an alphabet all lowercase letters
+val alphabet = "abcdefghijklmnopqrstuvwxyz".toSet
+
+def goto(q: State, c: Char, qs: States, delta: Transition) : (States, Transition) = {
+ val q_der : State = der(q, c)
+ if (qs.contains(q_der)) (qs, delta + ((q, c) -> q))
+ else explore(qs + q_der, delta + ((q, c) -> q_der), q_der)
+}
+
+def explore (qs: States, delta: Transition, q: State) : (States, Transition) =
+ alphabet.foldRight[(States, Transition)] (qs, delta) ((c, qsd) => goto(q, c, qsd._1, qsd._2))
+
+
+def mk_automaton (r: Rexp) : Automaton[Rexp] = {
+ val (qs, delta) = explore(Set(r), Map(), r);
+ val fins = for (q <- qs if nullable(q)) yield q;
+ Automaton[Rexp](r, qs, delta, fins)
+}
+
+val A = mk_automaton(ALT("ab","ac"))
+
+A.start
+A.states.toList.length
+
+println(A.accepts("bd"))
+println(A.accepts("ab"))
+println(A.accepts("ac"))
+
+val r1 = STAR(ALT("a","b"))
+val r2 = SEQ("b","b")
+val r3 = SEQ(SEQ(SEQ(r1, r2), r1), "a")
+val B = mk_automaton(r3)
+
+B.start
+B.states.toList.length
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/automata1.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,95 @@
+
+// a class for deterministic finite automata,
+// the type of states is kept polymorphic
+
+case class Automaton[A](start: A, states: Set[A], delta: Map[(A, Char), A], fins: Set[A]) {
+
+ // the transition function lifted to list of characters
+ def deltas(q: A, cs: List[Char]) : A =
+ if (states.contains(q)) cs match {
+ case Nil => q
+ case c::cs =>
+ if (delta.isDefinedAt(q, c)) deltas(delta(q, c), cs)
+ else throw new RuntimeException(q + " does not have a transition for " + c)
+ }
+ else throw new RuntimeException(q + " is not a state of the automaton")
+
+ // wether a string is accepted by the automaton
+ def accepts(s: String) =
+ try {
+ fins.contains(deltas(start, s.toList))
+ } catch {
+ case e:RuntimeException => false
+ }
+}
+
+
+
+abstract class Rexp
+
+case object NULL extends Rexp
+case object EMPTY extends Rexp
+case class CHAR(c: Char) extends Rexp
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+case class STAR(r: Rexp) extends Rexp
+
+implicit def string2rexp(s : String) = {
+ def chars2rexp (cs: List[Char]) : Rexp = cs match {
+ case Nil => EMPTY
+ case c::Nil => CHAR(c)
+ case c::cs => SEQ(CHAR(c), chars2rexp(cs))
+ }
+ chars2rexp(s.toList)
+}
+
+def nullable (r: Rexp) : Boolean = r match {
+ case NULL => false
+ case EMPTY => true
+ case CHAR(_) => false
+ case ALT(r1, r2) => nullable(r1) || nullable(r2)
+ case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ case STAR(_) => true
+}
+
+def der (r: Rexp, c: Char) : Rexp = r match {
+ case NULL => NULL
+ case EMPTY => NULL
+ case CHAR(d) => if (c == d) EMPTY else NULL
+ case ALT(r1, r2) => ALT(der(r1, c), der(r2, c))
+ case SEQ(r1, r2) => if (nullable(r1)) ALT(SEQ(der(r1, c), r2), der(r2, c))
+ else SEQ(der(r1, c), r2)
+ case STAR(r) => SEQ(der(r, c), STAR(r))
+}
+
+
+// Here we construct an automaton whose
+// states are regular expressions
+type State = Rexp
+type States = Set[State]
+type Transition = Map[(State, Char), State]
+
+def goto(q: State, c: Char, qs: States, delta: Transition) : (States, Transition) = {
+ val qc : State = der(q, c)
+ if (qs.contains(qc)) (qs, delta + ((q, c) -> q))
+ else explore(qs + qc, delta + ((q, c) -> qc), qc)
+}
+
+// we use as alphabet all lowercase letters
+val alphabet = "abcdefghijklmnopqrstuvwxyz".toSet
+
+def explore (qs: States, delta: Transition, q: State) : (States, Transition) =
+ alphabet.foldRight[(States, Transition)] (qs, delta) ((c, qsd) => goto(q, c, qsd._1, qsd._2))
+
+
+def mk_automaton (r: Rexp) : Automaton[Rexp] = {
+ val (qs, delta) = explore(Set(r), Map(), r);
+ val fins = for (q <- qs if nullable(q)) yield q;
+ Automaton[Rexp](r, qs, delta, fins)
+}
+
+val A = mk_automaton(ALT("ab","ac"))
+
+println(A.accepts("bd"))
+println(A.accepts("ab"))
+println(A.accepts("ac"))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/compile.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,326 @@
+// A parser and evaluator for teh while language
+//
+import matcher._
+import parser._
+
+// some regular expressions
+val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_")
+val DIGIT = RANGE("0123456789")
+val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT)))
+val NUM = PLUS(DIGIT)
+val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false", "write")
+val SEMI: Rexp = ";"
+val OP: Rexp = ALTS(":=", "=", "-", "+", "*", "!=", "<", ">")
+val WHITESPACE = PLUS(RANGE(" \n"))
+val RPAREN: Rexp = ")"
+val LPAREN: Rexp = "("
+val BEGIN: Rexp = "{"
+val END: Rexp = "}"
+val COMMENT = SEQS("/*", NOT(SEQS(STAR(ALLC), "*/", STAR(ALLC))), "*/")
+
+// tokens for classifying the strings that have been recognised
+abstract class Token
+case object T_WHITESPACE extends Token
+case object T_COMMENT extends Token
+case object T_SEMI extends Token
+case object T_LPAREN extends Token
+case object T_RPAREN extends Token
+case object T_BEGIN extends Token
+case object T_END extends Token
+case class T_ID(s: String) extends Token
+case class T_OP(s: String) extends Token
+case class T_NUM(s: String) extends Token
+case class T_KWD(s: String) extends Token
+
+val lexing_rules: List[(Rexp, List[Char] => Token)] =
+ List((KEYWORD, (s) => T_KWD(s.mkString)),
+ (ID, (s) => T_ID(s.mkString)),
+ (OP, (s) => T_OP(s.mkString)),
+ (NUM, (s) => T_NUM(s.mkString)),
+ (SEMI, (s) => T_SEMI),
+ (LPAREN, (s) => T_LPAREN),
+ (RPAREN, (s) => T_RPAREN),
+ (BEGIN, (s) => T_BEGIN),
+ (END, (s) => T_END),
+ (WHITESPACE, (s) => T_WHITESPACE),
+ (COMMENT, (s) => T_COMMENT))
+
+// the tokenizer
+val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE, T_COMMENT))
+
+// the abstract syntax trees
+abstract class Stmt
+abstract class AExp
+abstract class BExp
+type Block = List[Stmt]
+case object Skip extends Stmt
+case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt
+case class While(b: BExp, bl: Block) extends Stmt
+case class Assign(s: String, a: AExp) extends Stmt
+case class Write(s: String) extends Stmt
+
+case class Var(s: String) extends AExp
+case class Num(i: Int) extends AExp
+case class Aop(o: String, a1: AExp, a2: AExp) extends AExp
+
+case object True extends BExp
+case object False extends BExp
+case class Relop(o: String, a1: AExp, a2: AExp) extends BExp
+
+// atomic parsers
+case class TokParser(tok: Token) extends Parser[List[Token], Token] {
+ def parse(ts: List[Token]) = ts match {
+ case t::ts if (t == tok) => Set((t, ts))
+ case _ => Set ()
+ }
+}
+implicit def token2tparser(t: Token) = TokParser(t)
+
+case object NumParser extends Parser[List[Token], Int] {
+ def parse(ts: List[Token]) = ts match {
+ case T_NUM(s)::ts => Set((s.toInt, ts))
+ case _ => Set ()
+ }
+}
+
+case object IdParser extends Parser[List[Token], String] {
+ def parse(ts: List[Token]) = ts match {
+ case T_ID(s)::ts => Set((s, ts))
+ case _ => Set ()
+ }
+}
+
+
+// arithmetic expressions
+lazy val AExp: Parser[List[Token], AExp] =
+ (T ~ T_OP("+") ~ AExp) ==> { case ((x, y), z) => Aop("+", x, z): AExp } ||
+ (T ~ T_OP("-") ~ AExp) ==> { case ((x, y), z) => Aop("-", x, z): AExp } || T
+lazy val T: Parser[List[Token], AExp] =
+ (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F
+lazy val F: Parser[List[Token], AExp] =
+ (T_LPAREN ~> AExp <~ T_RPAREN) ||
+ IdParser ==> Var ||
+ NumParser ==> Num
+
+// boolean expressions
+lazy val BExp: Parser[List[Token], BExp] =
+ (T_KWD("true") ==> ((_) => True: BExp)) ||
+ (T_KWD("false") ==> ((_) => False: BExp)) ||
+ (T_LPAREN ~> BExp <~ T_RPAREN) ||
+ (AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Relop("=", x, z): BExp } ||
+ (AExp ~ T_OP("!=") ~ AExp) ==> { case ((x, y), z) => Relop("!=", x, z): BExp } ||
+ (AExp ~ T_OP("<") ~ AExp) ==> { case ((x, y), z) => Relop("<", x, z): BExp } ||
+ (AExp ~ T_OP(">") ~ AExp) ==> { case ((x, y), z) => Relop("<", z, x): BExp }
+
+lazy val Stmt: Parser[List[Token], Stmt] =
+ (T_KWD("skip") ==> ((_) => Skip: Stmt)) ||
+ (IdParser ~ T_OP(":=") ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } ||
+ (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Block ~ T_KWD("else") ~ Block) ==>
+ { case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } ||
+ (T_KWD("while") ~ BExp ~ T_KWD("do") ~ Block) ==> { case (((x, y), z), w) => While(y, w) } ||
+ (T_KWD("write") ~ IdParser) ==> { case (x, y) => Write(y) }
+
+lazy val Stmts: Parser[List[Token], Block] =
+ (Stmt ~ T_SEMI ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } ||
+ (Stmt ==> ((s) => List(s) : Block))
+
+lazy val Block: Parser[List[Token], Block] =
+ (T_BEGIN ~> Stmts <~ T_END) ||
+ (Stmt ==> ((s) => List(s)))
+
+// compiler
+val beginning = """
+.class public XXX.XXX
+.super java/lang/Object
+
+.method public <init>()V
+ aload_0
+ invokenonvirtual java/lang/Object/<init>()V
+ return
+.end method
+
+.method public static write(I)V
+ .limit locals 5
+ .limit stack 5
+ iload 0
+ getstatic java/lang/System/out Ljava/io/PrintStream;
+ swap
+ invokevirtual java/io/PrintStream/println(I)V
+ return
+.end method
+
+
+.method public static main([Ljava/lang/String;)V
+ .limit locals 200
+ .limit stack 200
+
+"""
+
+val ending = """
+
+ return
+
+.end method
+"""
+
+// for generating new labels
+var counter = -1
+
+def Fresh(x: String) = {
+ counter += 1
+ x ++ "_" ++ counter.toString()
+}
+
+type Env = Map[String, String]
+type Instrs = List[String]
+
+def compile_aexp(a: AExp, env : Env) : Instrs = a match {
+ case Num(i) => List("ldc " + i.toString + "\n")
+ case Var(s) => List("iload " + env(s) + "\n")
+ case Aop("+", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("iadd\n")
+ case Aop("-", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("isub\n")
+ case Aop("*", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("imul\n")
+}
+
+def compile_bexp(b: BExp, env : Env, jmp: String) : Instrs = b match {
+ case True => Nil
+ case False => List("goto " + jmp + "\n")
+ case Relop("=", a1, a2) =>
+ compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpne " + jmp + "\n")
+ case Relop("!=", a1, a2) =>
+ compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpeq " + jmp + "\n")
+ case Relop("<", a1, a2) =>
+ compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpge " + jmp + "\n")
+}
+
+
+def compile_stmt(s: Stmt, env: Env) : (Instrs, Env) = s match {
+ case Skip => (Nil, env)
+ case Assign(x, a) => {
+ val index = if (env.isDefinedAt(x)) env(x) else env.keys.size.toString
+ (compile_aexp(a, env) ++
+ List("istore " + index + "\n"), env + (x -> index))
+ }
+ case If(b, bl1, bl2) => {
+ val if_else = Fresh("If_else")
+ val if_end = Fresh("If_end")
+ val (instrs1, env1) = compile_bl(bl1, env)
+ val (instrs2, env2) = compile_bl(bl2, env1)
+ (compile_bexp(b, env, if_else) ++
+ instrs1 ++
+ List("goto " + if_end + "\n") ++
+ List("\n" + if_else + ":\n\n") ++
+ instrs2 ++
+ List("\n" + if_end + ":\n\n"), env2)
+ }
+ case While(b, bl) => {
+ val loop_begin = Fresh("Loop_begin")
+ val loop_end = Fresh("Loop_end")
+ val (instrs1, env1) = compile_bl(bl, env)
+ (List("\n" + loop_begin + ":\n\n") ++
+ compile_bexp(b, env, loop_end) ++
+ instrs1 ++
+ List("goto " + loop_begin + "\n") ++
+ List("\n" + loop_end + ":\n\n"), env1)
+ }
+ case Write(x) =>
+ (List("iload " + env(x) + "\n" + "invokestatic XXX/XXX/write(I)V\n"), env)
+}
+
+def compile_bl(bl: Block, env: Env) : (Instrs, Env) = bl match {
+ case Nil => (Nil, env)
+ case s::bl => {
+ val (instrs1, env1) = compile_stmt(s, env)
+ val (instrs2, env2) = compile_bl(bl, env1)
+ (instrs1 ++ instrs2, env2)
+ }
+}
+
+def compile(input: String) : String = {
+ val class_name = input.split('.')(0)
+ val tks = Tok.fromFile(input)
+ val ast = Stmts.parse_single(tks)
+ val instructions = compile_bl(ast, Map.empty)._1
+ (beginning ++ instructions.mkString ++ ending).replaceAllLiterally("XXX", class_name)
+}
+
+
+def compile_to(input: String, output: String) = {
+ val fw = new java.io.FileWriter(output)
+ fw.write(compile(input))
+ fw.close()
+}
+
+//
+val tks = Tok.fromString("x := x + 1")
+val ast = Stmt.parse_single(tks)
+println(compile_stmt(ast, Map("x" -> "n"))._1.mkString)
+
+
+
+//examples
+
+compile_to("loops.while", "loops.j")
+//compile_to("fib.while", "fib.j")
+
+
+// testing cases for time measurements
+
+def time_needed[T](i: Int, code: => T) = {
+ val start = System.nanoTime()
+ for (j <- 1 to i) code
+ val end = System.nanoTime()
+ (end - start)/(i * 1.0e9)
+}
+
+// for testing
+import scala.sys.process._
+
+val test_prog = """
+start := XXX;
+x := start;
+y := start;
+z := start;
+while 0 < x do {
+ while 0 < y do {
+ while 0 < z do {
+ z := z - 1
+ };
+ z := start;
+ y := y - 1
+ };
+ y := start;
+ x := x - 1
+};
+write x;
+write y;
+write z
+"""
+
+
+def compile_test(n: Int) : Unit = {
+ val class_name = "LOOP"
+ val tks = Tok.fromString(test_prog.replaceAllLiterally("XXX", n.toString))
+ val ast = Stmts.parse_single(tks)
+ val instructions = compile_bl(ast, Map.empty)._1
+ val assembly = (beginning ++ instructions.mkString ++ ending).replaceAllLiterally("XXX", class_name)
+ val fw = new java.io.FileWriter(class_name + ".j")
+ fw.write(assembly)
+ fw.close()
+ val test = ("java -jar jvm/jasmin-2.4/jasmin.jar " + class_name + ".j").!!
+ println(n + " " + time_needed(2, ("java " + class_name + "/" + class_name).!!))
+}
+
+List(1, 5000, 10000, 50000, 100000, 250000, 500000, 750000, 1000000).map(compile_test(_))
+
+
+
+// Javabyte code assmbler
+//
+// java -jar jvm/jasmin-2.4/jasmin.jar loops.j
+
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/crawler.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,144 @@
+import io.Source
+import scala.util.matching.Regex
+
+// gets the first ~10K of a page
+def get_page(url: String) : String = {
+ try {
+ Source.fromURL(url).take(10000).mkString
+ }
+ catch {
+ case e => {
+ println(" Problem with: " + url)
+ ""
+ }
+ }
+}
+
+// non-existing page -> returns the empty string
+get_page("""http://www.foobar.com""")
+
+
+// staring URL for the crawler
+val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc/"""
+
+// starts with an "
+// then either http or https
+// then ://
+// then any character that is not "
+// finally "
+val http_pattern = """\"((?:http|https)://(?:[^\"])*)\"""".r
+val http_pattern = """\"(https?://[^\"]*)\"""".r
+
+def unquote(s: String) = s.drop(1).dropRight(1)
+
+def get_all_URLs(page: String) : Set[String] = {
+ (http_pattern.findAllIn(page)).map { unquote(_) }.toSet
+}
+
+// get all urls in startURL
+get_all_URLs(get_page(startURL))
+
+// number of all urls in startURL
+get_all_URLs(get_page(startURL)).toList.length
+
+
+// naive version - seraches until a given depth
+// visits pages potentially more than once
+def crawl(url: String, n: Int) : Unit = {
+ if (n == 0) ()
+ else {
+ println("Visiting: " + n + " " + url)
+ for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1)
+ }
+}
+
+crawl(startURL, 2)
+
+
+//breadth-first version without visiting
+//pages twice
+def bf_crawl(todo: Set[String], visited: Set[String], n: Int) : Unit = {
+ if (n == 0) ()
+ else {
+ val new_todo = todo.flatMap {
+ url => {
+ if (visited.contains(url)) Set[String]()
+ else {
+ println("Visiting: " + n + " " + url)
+ get_all_URLs(get_page(url))
+ }
+ }
+ }
+ bf_crawl(new_todo, visited union todo, n - 1)
+ }
+}
+
+bf_crawl(Set(startURL1), Set(), 2)
+
+
+//breadth-first version without visiting
+//pages twice and only in "my" domain
+val my_pattern = """urbanc""".r
+
+// breadth first search avoiding double searches
+def bf_crawl2(todo: Set[String], visited: Set[String], n: Int) : Unit = {
+ if (n == 0) ()
+ else {
+ val new_todo = todo.flatMap {
+ url => {
+ if (visited.contains(url)) Set[String]()
+ else if (my_pattern.findFirstIn(url) == None) Set[String]()
+ else {
+ println("Visiting: " + n + " " + url);
+ get_all_URLs(get_page(url))
+ }
+ }
+ }
+ bf_crawl2(new_todo, visited union todo, n - 1)
+ }
+}
+
+bf_crawl2(Set(startURL1), Set(), 5)
+
+// email harvester
+// from
+// http://net.tutsplus.com/tutorials/other/8-regular-expressions-you-should-know/
+
+val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r
+
+def bf_crawl3(todo: Set[String], visited: Set[String], n: Int) : Unit = {
+ if (n == 0) ()
+ else {
+ val new_todo = todo.flatMap {
+ url => {
+ if (visited.contains(url)) Set[String]()
+ else {
+ println("Visiting: " + n + " " + url);
+ val page = get_page(url)
+ println(email_pattern.findAllIn(page).mkString("\n"))
+ get_all_URLs(get_page(url))
+ }
+ }
+ }
+ bf_crawl3(new_todo, visited union todo, n - 1)
+ }
+}
+
+bf_crawl3(Set(startURL1), Set(), 3)
+
+
+// depth-first version does not work,
+// because it might visit pages at depth 1
+// while it still wants to visit them at
+// depth 2
+var visited = Set("")
+
+def crawl(url: String, n: Int) : Unit = {
+ if (n == 0) ()
+ else if (visited.contains(url)) () //println("Already visited: " + n + " " + url)
+ else {
+ println("Visiting: " + n + " " + url);
+ visited += url
+ for (u <- getAllURLs(getURLpage(url))) crawl(u, n - 1);
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/crawler1.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,45 @@
+import io.Source
+import scala.util.matching.Regex
+
+// gets the first ~10K of a page
+def get_page(url: String) : String = {
+ try {
+ Source.fromURL(url).take(10000).mkString
+ }
+ catch {
+ case e => {
+ println(" Problem with: " + url)
+ ""
+ }
+ }
+}
+
+
+// regex for URLs
+val http_pattern = """\"https?://[^\"]*\"""".r
+
+def unquote(s: String) = s.drop(1).dropRight(1)
+
+def get_all_URLs(page: String) : Set[String] = {
+ (http_pattern.findAllIn(page)).map { unquote(_) }.toSet
+}
+
+// naive version - seraches until a given depth
+// visits pages potentially more than once
+def crawl(url: String, n: Int) : Unit = {
+ if (n == 0) ()
+ else {
+ println("Visiting: " + n + " " + url)
+ for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1)
+ }
+}
+
+// staring URL for the crawler
+val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc/"""
+//val startURL = """http://www.inf.kcl.ac.uk/staff/mml/"""
+
+
+// call on the command line
+crawl(startURL, 2)
+
+crawl("""http://www.dcs.kcl.ac.uk/staff/urbanc/msc-projects-12.html""", 2)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/crawler2.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,44 @@
+import io.Source
+import scala.util.matching.Regex
+
+// gets the first ~10K of a page
+def get_page(url: String) : String = {
+ try {
+ Source.fromURL(url).take(10000).mkString
+ }
+ catch {
+ case e => {
+ println(" Problem with: " + url)
+ ""
+ }
+ }
+}
+
+// staring URL for the crawler
+val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc/"""
+
+// regex for URLs
+val http_pattern = """\"https?://[^\"]*\"""".r
+val my_urls = """urbanc""".r
+
+def unquote(s: String) = s.drop(1).dropRight(1)
+
+def get_all_URLs(page: String) : Set[String] = {
+ (http_pattern.findAllIn(page)).map { unquote(_) }.toSet
+}
+
+// naive version - seraches until a given depth
+// visits pages potentially more than once
+def crawl(url: String, n: Int) : Unit = {
+ if (n == 0) ()
+ else if (my_urls.findFirstIn(url) == None) ()
+ else {
+ println("Visiting: " + n + " " + url)
+ for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1)
+ }
+}
+
+// can now deal with depth 3
+// start on command line
+crawl(startURL, 4)
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/crawler3.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,49 @@
+import io.Source
+import scala.util.matching.Regex
+
+// gets the first ~10K of a page
+def get_page(url: String) : String = {
+ try {
+ Source.fromURL(url).take(10000).mkString
+ }
+ catch {
+ case e => {
+ println(" Problem with: " + url)
+ ""
+ }
+ }
+}
+
+// staring URL for the crawler
+val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc/"""
+
+// regex for URLs
+val http_pattern = """\"https?://[^\"]*\"""".r
+val my_urls = """urbanc""".r
+val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r
+
+// http://net.tutsplus.com/tutorials/other/8-regular-expressions-you-should-know/
+
+def unquote(s: String) = s.drop(1).dropRight(1)
+
+def get_all_URLs(page: String) : Set[String] = {
+ (http_pattern.findAllIn(page)).map { unquote(_) }.toSet
+}
+
+// naive version - seraches until a given depth
+// visits pages potentially more than once
+def crawl(url: String, n: Int) : Unit = {
+ if (n == 0) ()
+ //else if (my_urls.findFirstIn(url) == None) ()
+ else {
+ println("Visiting: " + n + " " + url)
+ val page = get_page(url)
+ println(email_pattern.findAllIn(page).mkString("\n"))
+ for (u <- get_all_URLs(page)) crawl(u, n - 1)
+ }
+}
+
+// can now deal with depth 3
+// start on command line
+crawl(startURL, 3)
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/html.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,99 @@
+
+//:load matcher.scala
+
+// some regular expressions
+val SYM = RANGE("""ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz.,!?-{[()]}':;%0123456789""")
+val WORD = PLUS(SYM)
+
+val BTAG = SEQS("<", WORD, ">")
+val ETAG = SEQS("</", WORD, ">")
+
+val WHITESPACE = PLUS(RANGE(" \n"))
+
+// for classifying the strings that have been recognised
+abstract class Token
+case object T_WHITESPACE extends Token
+case class T_WORD(s: String) extends Token
+case class T_ETAG(s: String) extends Token
+case class T_BTAG(s: String) extends Token
+case class T_NT(s: String, rhs: List[Token]) extends Token
+
+val lexing_rules: List[Rule[Token]] =
+ List((BTAG, (s) => T_BTAG(s.mkString)),
+ (ETAG, (s) => T_ETAG(s.mkString)),
+ (WORD, (s) => T_WORD(s.mkString)),
+ (WHITESPACE, (s) => T_WHITESPACE))
+
+// the tokenizer
+val T = Tokenizer(lexing_rules)
+
+// width for printing
+val WIDTH = 60
+
+
+def interpret(ts: List[Token], c: Int, ctr: List[String]) : Unit= ts match {
+ case Nil => println(Console.RESET)
+ case T_WHITESPACE::rest => print(Console.RESET + " "); interpret(rest, c + 1, ctr)
+ case T_WORD(s)::rest => {
+ val newstr = Console.RESET + ctr.reverse.mkString + s
+ if (c + s.length < WIDTH) {
+ print(newstr);
+ interpret(rest, c + s.length, ctr)
+ }
+ else {
+ print("\n" + newstr)
+ interpret(rest, s.length, ctr)
+ }
+ }
+ case T_BTAG("<p>")::rest => print("\n"); interpret(rest, 0, ctr)
+ case T_ETAG("</p>")::rest => print("\n"); interpret(rest, 0, ctr)
+ case T_BTAG("<b>")::rest => interpret(rest, c, Console.BOLD :: ctr)
+ case T_BTAG("<a>")::rest => interpret(rest, c, Console.UNDERLINED :: ctr)
+ case T_BTAG("<cyan>")::rest => interpret(rest, c, Console.CYAN :: ctr)
+ case T_BTAG("<red>")::rest => interpret(rest, c, Console.RED :: ctr)
+ case T_BTAG("<blink>")::rest => interpret(rest, c, Console.BLINK :: ctr)
+ case T_ETAG(_)::rest => interpret(rest, c, ctr.tail)
+ case _::rest => interpret(rest, c, ctr)
+}
+
+val test_string = """
+<b>MSc Projects</b>
+
+<p>
+start of paragraph. <cyan> a <red>cyan</red> word</cyan> normal again something longer.
+</p>
+
+
+ <p><b>Description:</b>
+ <a>Regular expressions</a> are extremely useful for many text-processing tasks such as finding patterns in texts,
+ lexing programs, syntax highlighting and so on. Given that regular expressions were
+ introduced in 1950 by <a>Stephen Kleene</a>, you might think
+ regular expressions have since been studied and implemented to death. But you would definitely be mistaken: in fact they are still
+ an active research area. For example
+ <a>this paper</a>
+ about regular expression matching and partial derivatives was presented this summer at the international
+ PPDP'12 conference. The task in this project is to implement the results from this paper.</p>
+
+ <p>The background for this project is that some regular expressions are
+ <a>evil</a>
+ and can stab you in the back; according to
+ this <a>blog post</a>.
+ For example, if you use in <a>Python</a> or
+ in <a>Ruby</a> (probably also in other mainstream programming languages) the
+ innocently looking regular expression a?{28}a{28} and match it, say, against the string
+ <red>aaaaaaaaaa<cyan>aaaaaaa</cyan>aaaaaaaaaaa</red> (that is 28 as), you will soon notice that your CPU usage goes to 100%. In fact,
+ Python and Ruby need approximately 30 seconds of hard work for matching this string. You can try it for yourself:
+ <a>re.py</a> (Python version) and
+ <a>re.rb</a>
+ (Ruby version). You can imagine an attacker
+ mounting a nice <a>DoS attack</a> against
+ your program if it contains such an evil regular expression. Actually
+ <a>Scala</a> (and also Java) are almost immune from such
+ attacks as they can deal with strings of up to 4,300 as in less than a second. But if you scale
+ the regular expression and string further to, say, 4,600 as, then you get a
+ StackOverflowError
+ potentially crashing your program.
+ </p>
+"""
+
+interpret(T.fromString(test_string), 0, Nil)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/html1.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,75 @@
+
+
+//:load matcher.scala
+
+
+// some regular expressions
+val LETTER = RANGE("""ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz.,!?-{[()]}':;%""")
+val DIGIT = RANGE("0123456789")
+val NONZERODIGIT = RANGE("123456789")
+
+val NAME = ALT(PLUS(LETTER), SEQS(PLUS(LETTER),"=", PLUS(LETTER)))
+val BTAG = SEQS("<", NAME, ">")
+val ETAG = SEQS("</", PLUS(LETTER), ">")
+
+val WORD = PLUS(ALT(LETTER, DIGIT))
+val WHITESPACE = PLUS(RANGE(" \n"))
+
+// for classifying the strings that have been recognised
+abstract class Token
+case object T_WHITESPACE extends Token
+case class T_WORD(s: String) extends Token
+case class T_ETAG(s: String) extends Token
+case class T_BTAG(s: String) extends Token
+case class T_NT(s: String, rhs: List[Token]) extends Token
+
+def tokenizer(rs: List[Rule[Token]], s: String) : List[Token] =
+ tokenize(rs, s.toList)
+
+
+
+// lexing rules for arithmetic expressions
+val lexing_rules: List[Rule[Token]]=
+ List((BTAG, (s) => T_BTAG(s.mkString)),
+ (ETAG, (s) => T_ETAG(s.mkString)),
+ (WORD, (s) => T_WORD(s.mkString)),
+ (WHITESPACE, (s) => T_WHITESPACE))
+
+val ts = tokenize_file(lexing_rules, "test.html")
+
+
+val WIDTH = 60
+
+def is_tag(t: Token) = t match {
+ case T_BTAG(_) => true
+ case T_ETAG(_) => true
+ case _ => false
+}
+
+def interpret(ts: List[Token], c: Int, ctr: List[String]) : Unit= ts match {
+ case Nil => println(Console.RESET)
+ case T_WHITESPACE::rest => print(" "); interpret(rest, c + 1, ctr)
+ case T_WORD(s)::rest => {
+ val newc = c + s.length
+ val newstr = Console.RESET + ctr.reverse.mkString + s
+ if (newc < WIDTH) {
+ print(newstr);
+ interpret(rest, newc, ctr)
+ }
+ else {
+ print("\n" + newstr)
+ interpret(rest, s.length, ctr)
+ }
+ }
+ case T_BTAG("<p>")::rest => print("\n"); interpret(rest, 0, ctr)
+ case T_ETAG("</p>")::rest => print("\n"); interpret(rest, 0, ctr)
+ case T_BTAG("<b>")::rest => interpret(rest, c, Console.BOLD :: ctr)
+ case T_BTAG("<A>")::rest => interpret(rest, c, Console.UNDERLINED :: ctr)
+ case T_BTAG("<cyan>")::rest => interpret(rest, c, Console.CYAN :: ctr)
+ case T_BTAG("<red>")::rest => interpret(rest, c, Console.RED :: ctr)
+ case T_BTAG("<blink>")::rest => interpret(rest, c, Console.BLINK :: ctr)
+ case T_ETAG(_)::rest => interpret(rest, c, ctr.tail)
+ case _::rest => interpret(rest, c, ctr)
+}
+
+interpret(ts, 0, Nil)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/i.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,492 @@
+
+// regular expressions including NOT
+abstract class Rexp
+
+case object NULL extends Rexp
+case object EMPTY extends Rexp
+case object ALLC extends Rexp // recognises any character
+case class CHAR(c: Char) extends Rexp
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+case class STAR(r: Rexp) extends Rexp
+case class NOT(r: Rexp) extends Rexp // negation of a regular expression
+
+
+// nullable function: tests whether the regular
+// expression can recognise the empty string
+def nullable (r: Rexp) : Boolean = r match {
+ case NULL => false
+ case EMPTY => true
+ case ALLC => false
+ case CHAR(_) => false
+ case ALT(r1, r2) => nullable(r1) || nullable(r2)
+ case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ case STAR(_) => true
+ case NOT(r) => !(nullable(r))
+}
+
+// tests whether a regular expression
+// cannot recognise more
+def no_more (r: Rexp) : Boolean = r match {
+ case NULL => true
+ case EMPTY => false
+ case ALLC => false
+ case CHAR(_) => false
+ case ALT(r1, r2) => no_more(r1) && no_more(r2)
+ case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1)
+ case STAR(_) => false
+ case NOT(r) => !(no_more(r))
+}
+
+
+// derivative of a regular expression w.r.t. a character
+def der (c: Char, r: Rexp) : Rexp = r match {
+ case NULL => NULL
+ case EMPTY => NULL
+ case ALLC => EMPTY
+ case CHAR(d) => if (c == d) EMPTY else NULL
+ case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
+ case SEQ(r1, r2) =>
+ if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
+ else SEQ(der(c, r1), r2)
+ case STAR(r) => SEQ(der(c, r), STAR(r))
+ case NOT(r) => NOT(der (c, r))
+}
+
+// regular expression for specifying
+// ranges of characters
+def Range(s : List[Char]) : Rexp = s match {
+ case Nil => NULL
+ case c::Nil => CHAR(c)
+ case c::s => ALT(CHAR(c), Range(s))
+}
+def RANGE(s: String) = Range(s.toList)
+
+
+// one or more
+def PLUS(r: Rexp) = SEQ(r, STAR(r))
+
+// many alternatives
+def Alts(rs: List[Rexp]) : Rexp = rs match {
+ case Nil => NULL
+ case r::Nil => r
+ case r::rs => ALT(r, Alts(rs))
+}
+def ALTS(rs: Rexp*) = Alts(rs.toList)
+
+// repetitions
+def Seqs(rs: List[Rexp]) : Rexp = rs match {
+ case Nil => NULL
+ case r::Nil => r
+ case r::rs => SEQ(r, Seqs(rs))
+}
+def SEQS(rs: Rexp*) = Seqs(rs.toList)
+
+// some convenience for typing in regular expressions
+def charlist2rexp(s : List[Char]) : Rexp = s match {
+ case Nil => EMPTY
+ case c::Nil => CHAR(c)
+ case c::s => SEQ(CHAR(c), charlist2rexp(s))
+}
+implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
+
+
+type Rule[T] = (Rexp, List[Char] => T)
+
+case class Tokenizer[T](rules: List[Rule[T]], excl: List[T] = Nil) {
+
+ def munch(r: Rexp, action: List[Char] => T, s: List[Char], t: List[Char]) : Option[(List[Char], T)] =
+ s match {
+ case Nil if (nullable(r)) => Some(Nil, action(t))
+ case Nil => None
+ case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, action(t))
+ case c::s if (no_more(der (c, r))) => None
+ case c::s => munch(der (c, r), action, s, t ::: List(c))
+ }
+
+ def one_token(s: List[Char]) : Either[(List[Char], T), String] = {
+ val somes = rules.map { (r) => munch(r._1, r._2, s, Nil) }.flatten
+ if (somes == Nil) Right(s.mkString)
+ else Left(somes sortBy (_._1.length) head)
+ }
+
+ def tokenize(cs: List[Char]) : List[T] = cs match {
+ case Nil => Nil
+ case _ => one_token(cs) match {
+ case Left((rest, token)) => token :: tokenize(rest)
+ case Right(s) => { println("Cannot tokenize: \"" + s + "\""); Nil }
+ }
+ }
+
+ def fromString(s: String) : List[T] =
+ tokenize(s.toList).filterNot(excl.contains(_))
+
+ def fromFile(name: String) : List[T] =
+ fromString(io.Source.fromFile(name).mkString)
+
+}
+
+
+// parser combinators with input type I and return type T
+
+abstract class Parser[I <% Seq[_], T] {
+ def parse(ts: I): Set[(T, I)]
+
+ def parse_all(ts: I) : Set[T] =
+ for ((head, tail) <- parse(ts); if (tail.isEmpty)) yield head
+
+ def parse_single(ts: I) : T = parse_all(ts).toList match {
+ case t::Nil => t
+ case _ => { println ("Parse Error") ; sys.exit(-1) }
+ }
+
+ def || (right : => Parser[I, T]) : Parser[I, T] = new AltParser(this, right)
+ def ==>[S] (f: => T => S) : Parser [I, S] = new FunParser(this, f)
+ def ~[S] (right : => Parser[I, S]) : Parser[I, (T, S)] = new SeqParser(this, right)
+ def ~>[S] (right : => Parser[I, S]) : Parser[I, S] = this ~ right ==> (_._2)
+ def <~[S] (right : => Parser[I, S]) : Parser[I, T] = this ~ right ==> (_._1)
+}
+
+class SeqParser[I <% Seq[_], T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, (T, S)] {
+ def parse(sb: I) =
+ for ((head1, tail1) <- p.parse(sb);
+ (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2)
+}
+
+class AltParser[I <% Seq[_], T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] {
+ def parse(sb: I) = p.parse(sb) ++ q.parse(sb)
+}
+
+class FunParser[I <% Seq[_], T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] {
+ def parse(sb: I) =
+ for ((head, tail) <- p.parse(sb)) yield (f(head), tail)
+}
+
+
+// A parser and evaluator for teh while language
+//
+//:load matcher.scala
+//:load parser3.scala
+
+// some regular expressions
+val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_")
+val DIGIT = RANGE("0123456789")
+val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT)))
+val NUM = PLUS(DIGIT)
+val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false", "write")
+val SEMI: Rexp = ";"
+val OP: Rexp = ALTS(":=", "=", "-", "+", "*", "!=", "<", ">")
+val WHITESPACE = PLUS(RANGE(" \n"))
+val RPAREN: Rexp = ")"
+val LPAREN: Rexp = "("
+val BEGIN: Rexp = "{"
+val END: Rexp = "}"
+val COMMENT = SEQS("/*", NOT(SEQS(STAR(ALLC), "*/", STAR(ALLC))), "*/")
+
+// tokens for classifying the strings that have been recognised
+abstract class Token
+case object T_WHITESPACE extends Token
+case object T_COMMENT extends Token
+case object T_SEMI extends Token
+case object T_LPAREN extends Token
+case object T_RPAREN extends Token
+case object T_BEGIN extends Token
+case object T_END extends Token
+case class T_ID(s: String) extends Token
+case class T_OP(s: String) extends Token
+case class T_NUM(s: String) extends Token
+case class T_KWD(s: String) extends Token
+
+val lexing_rules: List[Rule[Token]] =
+ List((KEYWORD, (s) => T_KWD(s.mkString)),
+ (ID, (s) => T_ID(s.mkString)),
+ (OP, (s) => T_OP(s.mkString)),
+ (NUM, (s) => T_NUM(s.mkString)),
+ (SEMI, (s) => T_SEMI),
+ (LPAREN, (s) => T_LPAREN),
+ (RPAREN, (s) => T_RPAREN),
+ (BEGIN, (s) => T_BEGIN),
+ (END, (s) => T_END),
+ (WHITESPACE, (s) => T_WHITESPACE),
+ (COMMENT, (s) => T_COMMENT))
+
+// the tokenizer
+val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE, T_COMMENT))
+
+// the abstract syntax trees
+abstract class Stmt
+abstract class AExp
+abstract class BExp
+type Block = List[Stmt]
+case object Skip extends Stmt
+case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt
+case class While(b: BExp, bl: Block) extends Stmt
+case class Assign(s: String, a: AExp) extends Stmt
+case class Write(s: String) extends Stmt
+
+case class Var(s: String) extends AExp
+case class Num(i: Int) extends AExp
+case class Aop(o: String, a1: AExp, a2: AExp) extends AExp
+
+case object True extends BExp
+case object False extends BExp
+case class Relop(o: String, a1: AExp, a2: AExp) extends BExp
+
+// atomic parsers
+case class TokParser(tok: Token) extends Parser[List[Token], Token] {
+ def parse(ts: List[Token]) = ts match {
+ case t::ts if (t == tok) => Set((t, ts))
+ case _ => Set ()
+ }
+}
+implicit def token2tparser(t: Token) = TokParser(t)
+
+case object NumParser extends Parser[List[Token], Int] {
+ def parse(ts: List[Token]) = ts match {
+ case T_NUM(s)::ts => Set((s.toInt, ts))
+ case _ => Set ()
+ }
+}
+
+case object IdParser extends Parser[List[Token], String] {
+ def parse(ts: List[Token]) = ts match {
+ case T_ID(s)::ts => Set((s, ts))
+ case _ => Set ()
+ }
+}
+
+
+// arithmetic expressions
+lazy val AExp: Parser[List[Token], AExp] =
+ (T ~ T_OP("+") ~ AExp) ==> { case ((x, y), z) => Aop("+", x, z): AExp } ||
+ (T ~ T_OP("-") ~ AExp) ==> { case ((x, y), z) => Aop("-", x, z): AExp } || T
+lazy val T: Parser[List[Token], AExp] =
+ (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F
+lazy val F: Parser[List[Token], AExp] =
+ (T_LPAREN ~> AExp <~ T_RPAREN) ||
+ IdParser ==> Var ||
+ NumParser ==> Num
+
+// boolean expressions
+lazy val BExp: Parser[List[Token], BExp] =
+ (T_KWD("true") ==> ((_) => True: BExp)) ||
+ (T_KWD("false") ==> ((_) => False: BExp)) ||
+ (T_LPAREN ~> BExp <~ T_RPAREN) ||
+ (AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Relop("=", x, z): BExp } ||
+ (AExp ~ T_OP("!=") ~ AExp) ==> { case ((x, y), z) => Relop("!=", x, z): BExp } ||
+ (AExp ~ T_OP("<") ~ AExp) ==> { case ((x, y), z) => Relop("<", x, z): BExp } ||
+ (AExp ~ T_OP(">") ~ AExp) ==> { case ((x, y), z) => Relop("<", z, x): BExp }
+
+lazy val Stmt: Parser[List[Token], Stmt] =
+ (T_KWD("skip") ==> ((_) => Skip: Stmt)) ||
+ (IdParser ~ T_OP(":=") ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } ||
+ (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Block ~ T_KWD("else") ~ Block) ==>
+ { case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } ||
+ (T_KWD("while") ~ BExp ~ T_KWD("do") ~ Block) ==> { case (((x, y), z), w) => While(y, w) } ||
+ (T_KWD("write") ~ IdParser) ==> { case (x, y) => Write(y) }
+
+lazy val Stmts: Parser[List[Token], Block] =
+ (Stmt ~ T_SEMI ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } ||
+ (Stmt ==> ((s) => List(s) : Block))
+
+lazy val Block: Parser[List[Token], Block] =
+ (T_BEGIN ~> Stmts <~ T_END) ||
+ (Stmt ==> ((s) => List(s)))
+
+// compiler
+val beginning = """
+.class public XXX.XXX
+.super java/lang/Object
+
+.method public <init>()V
+ aload_0
+ invokenonvirtual java/lang/Object/<init>()V
+ return
+.end method
+
+.method public static write(I)V
+ .limit locals 5
+ .limit stack 5
+ iload 0
+ getstatic java/lang/System/out Ljava/io/PrintStream;
+ swap
+ invokevirtual java/io/PrintStream/println(I)V
+ return
+.end method
+
+
+.method public static main([Ljava/lang/String;)V
+ .limit locals 200
+ .limit stack 200
+
+"""
+
+val ending = """
+
+ return
+
+.end method
+"""
+
+// for generating new labels
+var counter = -1
+
+def Fresh(x: String) = {
+ counter += 1
+ x ++ "_" ++ counter.toString()
+}
+
+type Env = Map[String, String]
+type Instrs = List[String]
+
+def compile_aexp(a: AExp, env : Env) : Instrs = a match {
+ case Num(i) => List("ldc " + i.toString + "\n")
+ case Var(s) => List("iload " + env(s) + "\n")
+ case Aop("+", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("iadd\n")
+ case Aop("-", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("isub\n")
+ case Aop("*", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("imul\n")
+}
+
+def compile_bexp(b: BExp, env : Env, jmp: String) : Instrs = b match {
+ case True => Nil
+ case False => List("goto " + jmp + "\n")
+ case Relop("=", a1, a2) =>
+ compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpne " + jmp + "\n")
+ case Relop("!=", a1, a2) =>
+ compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpeq " + jmp + "\n")
+ case Relop("<", a1, a2) =>
+ compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpge " + jmp + "\n")
+}
+
+
+def compile_stmt(s: Stmt, env: Env) : (Instrs, Env) = s match {
+ case Skip => (Nil, env)
+ case Assign(x, a) => {
+ val index = if (env.isDefinedAt(x)) env(x) else env.keys.size.toString
+ (compile_aexp(a, env) ++
+ List("istore " + index + "\n"), env + (x -> index))
+ }
+ case If(b, bl1, bl2) => {
+ val if_else = Fresh("If_else")
+ val if_end = Fresh("If_end")
+ val (instrs1, env1) = compile_bl(bl1, env)
+ val (instrs2, env2) = compile_bl(bl2, env1)
+ (compile_bexp(b, env, if_else) ++
+ instrs1 ++
+ List("goto " + if_end + "\n") ++
+ List("\n" + if_else + ":\n\n") ++
+ instrs2 ++
+ List("\n" + if_end + ":\n\n"), env2)
+ }
+ case While(b, bl) => {
+ val loop_begin = Fresh("Loop_begin")
+ val loop_end = Fresh("Loop_end")
+ val (instrs1, env1) = compile_bl(bl, env)
+ (List("\n" + loop_begin + ":\n\n") ++
+ compile_bexp(b, env, loop_end) ++
+ instrs1 ++
+ List("goto " + loop_begin + "\n") ++
+ List("\n" + loop_end + ":\n\n"), env1)
+ }
+ case Write(x) =>
+ (List("iload " + env(x) + "\n" + "invokestatic XXX/XXX/write(I)V\n"), env)
+}
+
+def compile_bl(bl: Block, env: Env) : (Instrs, Env) = bl match {
+ case Nil => (Nil, env)
+ case s::bl => {
+ val (instrs1, env1) = compile_stmt(s, env)
+ val (instrs2, env2) = compile_bl(bl, env1)
+ (instrs1 ++ instrs2, env2)
+ }
+}
+
+def compile(input: String) : String = {
+ val class_name = input.split('.')(0)
+ val tks = Tok.fromFile(input)
+ val ast = Stmts.parse_single(tks)
+ val instructions = compile_bl(ast, Map.empty)._1
+ (beginning ++ instructions.mkString ++ ending).replaceAllLiterally("XXX", class_name)
+}
+
+
+def compile_to(input: String, output: String) = {
+ val fw = new java.io.FileWriter(output)
+ fw.write(compile(input))
+ fw.close()
+}
+
+//
+val tks = Tok.fromString("x := x + 1")
+val ast = Stmt.parse_single(tks)
+println(compile_stmt(ast, Map("x" -> "n"))._1.mkString)
+
+
+
+//examples
+
+compile_to("loops.while", "loops.j")
+//compile_to("fib.while", "fib.j")
+
+
+// testing cases for time measurements
+/*
+def time_needed[T](i: Int, code: => T) = {
+ val start = System.nanoTime()
+ for (j <- 1 to i) code
+ val end = System.nanoTime()
+ (end - start)/(i * 1.0e9)
+}
+
+// for testing
+import scala.sys.process._
+
+val test_prog = """
+start := XXX;
+x := start;
+y := start;
+z := start;
+while 0 < x do {
+ while 0 < y do {
+ while 0 < z do {
+ z := z - 1
+ };
+ z := start;
+ y := y - 1
+ };
+ y := start;
+ x := x - 1
+};
+write x;
+write y;
+write z
+"""
+
+
+def compile_test(n: Int) : Unit = {
+ val class_name = "LOOP"
+ val tks = Tok.fromString(test_prog.replaceAllLiterally("XXX", n.toString))
+ val ast = Stmts.parse_single(tks)
+ val instructions = compile_bl(ast, Map.empty)._1
+ val assembly = (beginning ++ instructions.mkString ++ ending).replaceAllLiterally("XXX", class_name)
+ val fw = new java.io.FileWriter(class_name + ".j")
+ fw.write(assembly)
+ fw.close()
+ val test = ("java -jar jvm/jasmin-2.4/jasmin.jar " + class_name + ".j").!!
+ println(n + " " + time_needed(2, ("java " + class_name + "/" + class_name).!!))
+}
+
+List(1, 5000, 10000, 50000, 100000, 250000, 500000, 750000, 1000000).map(compile_test(_))
+
+
+
+// javabyte code assmbler
+//
+// java -jar jvm/jasmin-2.4/jasmin.jar loops.j
+
+*/
+
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/matcher.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,130 @@
+package object matcher {
+
+// regular expressions
+// including constructors for NOT and ALLC
+sealed abstract class Rexp
+
+case object NULL extends Rexp
+case object EMPTY extends Rexp
+case object ALLC extends Rexp // recognises any character
+case class CHAR(c: Char) extends Rexp
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+case class STAR(r: Rexp) extends Rexp
+case class NOT(r: Rexp) extends Rexp // negation of a regular expression
+
+
+// nullable function: tests whether the regular
+// expression can recognise the empty string
+def nullable (r: Rexp) : Boolean = r match {
+ case NULL => false
+ case EMPTY => true
+ case ALLC => false
+ case CHAR(_) => false
+ case ALT(r1, r2) => nullable(r1) || nullable(r2)
+ case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ case STAR(_) => true
+ case NOT(r) => !(nullable(r))
+}
+
+// tests whether a regular expression
+// cannot recognise more
+def no_more (r: Rexp) : Boolean = r match {
+ case NULL => true
+ case EMPTY => false
+ case ALLC => false
+ case CHAR(_) => false
+ case ALT(r1, r2) => no_more(r1) && no_more(r2)
+ case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1)
+ case STAR(_) => false
+ case NOT(r) => !(no_more(r))
+}
+
+
+// derivative of a regular expression w.r.t. a character
+def der (c: Char, r: Rexp) : Rexp = r match {
+ case NULL => NULL
+ case EMPTY => NULL
+ case ALLC => EMPTY
+ case CHAR(d) => if (c == d) EMPTY else NULL
+ case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
+ case SEQ(r1, r2) =>
+ if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
+ else SEQ(der(c, r1), r2)
+ case STAR(r) => SEQ(der(c, r), STAR(r))
+ case NOT(r) => NOT(der (c, r))
+}
+
+// main class for the tokenizer
+case class Tokenizer[T](rules: List[(Rexp, List[Char] => T)], excl: List[T] = Nil) {
+
+def munch(r: Rexp, action: List[Char] => T, s: List[Char], t: List[Char]) : Option[(List[Char], T)] =
+ s match {
+ case Nil if (nullable(r)) => Some(Nil, action(t))
+ case Nil => None
+ case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, action(t))
+ case c::s if (no_more(der (c, r))) => None
+ case c::s => munch(der (c, r), action, s, t ::: List(c))
+ }
+
+def one_token(s: List[Char]) : Either[(List[Char], T), String] = {
+ val somes = rules.map { (r) => munch(r._1, r._2, s, Nil) }.flatten
+ if (somes == Nil) Right(s.mkString)
+ else Left(somes sortBy (_._1.length) head)
+}
+
+def tokenize(cs: List[Char]) : List[T] = cs match {
+ case Nil => Nil
+ case _ => one_token(cs) match {
+ case Left((rest, token)) => token :: tokenize(rest)
+ case Right(s) => { println("Cannot tokenize: \"" + s + "\""); Nil }
+ }
+}
+
+def fromString(s: String) : List[T] =
+ tokenize(s.toList).filterNot(excl.contains(_))
+
+def fromFile(name: String) : List[T] =
+ fromString(io.Source.fromFile(name).mkString)
+
+}
+
+
+// regular expression for specifying
+// ranges of characters
+def Range(s : List[Char]) : Rexp = s match {
+ case Nil => NULL
+ case c::Nil => CHAR(c)
+ case c::s => ALT(CHAR(c), Range(s))
+}
+def RANGE(s: String) = Range(s.toList)
+
+
+// one or more
+def PLUS(r: Rexp) = SEQ(r, STAR(r))
+
+// many alternatives
+def Alts(rs: List[Rexp]) : Rexp = rs match {
+ case Nil => NULL
+ case r::Nil => r
+ case r::rs => ALT(r, Alts(rs))
+}
+def ALTS(rs: Rexp*) = Alts(rs.toList)
+
+// repetitions
+def Seqs(rs: List[Rexp]) : Rexp = rs match {
+ case Nil => NULL
+ case r::Nil => r
+ case r::rs => SEQ(r, Seqs(rs))
+}
+def SEQS(rs: Rexp*) = Seqs(rs.toList)
+
+// some convenience for typing in regular expressions
+def charlist2rexp(s : List[Char]) : Rexp = s match {
+ case Nil => EMPTY
+ case c::Nil => CHAR(c)
+ case c::s => SEQ(CHAR(c), charlist2rexp(s))
+}
+implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
+
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/mllex.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,109 @@
+:load matcher.scala
+
+
+// some regular expressions
+val KEYWORDS = ALTS(List("#", "(", ")", ",", "->", "...", ":", ":>", ";", "=",
+ "=>", "[", "]", "_", "{", "|", "}", "abstype", "and", "andalso", "as",
+ "case", "datatype", "do", "else", "end", "eqtype", "exception", "fn",
+ "fun", "functor", "handle", "if", "in", "include", "infix", "infixr",
+ "let", "local", "nonfix", "of", "op", "open", "orelse", "raise", "rec",
+ "sharing", "sig", "signature", "struct", "structure", "then", "type",
+ "val", "where", "while", "with", "withtype"))
+
+val DIGITS = RANGE("0123456789")
+val NONZERODIGITS = RANGE("123456789")
+
+val POSITIVES = ALT(SEQ(NONZERODIGITS, STAR(DIGITS)), "0")
+val INTEGERS = ALT(SEQ("~", POSITIVES), POSITIVES)
+
+val ALL = ALTS(KEYWORDS, INTEGERS)
+
+val COMMENT = SEQS("/*", NOT(SEGS(STAR(ALL), "*/", STAR(ALL))), "*/")
+
+
+
+val LPAREN = CHAR('(')
+val RPAREN = CHAR(')')
+val WHITESPACE = PLUS(RANGE(" \n".toList))
+val OPS = RANGE("+-*".toList)
+
+// for classifying the strings that have been recognised
+abstract class Token
+case object T_WHITESPACE extends Token
+case object T_NUM extends Token
+case class T_OP(s: String) extends Token
+case object T_LPAREN extends Token
+case object T_RPAREN extends Token
+case class T_NT(s: String, rhs: List[Token]) extends Token
+
+def tokenizer(rs: List[Rule[Token]], s: String) : List[Token] =
+ tokenize(rs, s.toList).filterNot(_ match {
+ case T_WHITESPACE => true
+ case _ => false
+ })
+
+
+
+// lexing rules for arithmetic expressions
+val lexing_rules: List[Rule[Token]]=
+ List((NUMBER, (s) => T_NUM),
+ (WHITESPACE, (s) => T_WHITESPACE),
+ (LPAREN, (s) => T_LPAREN),
+ (RPAREN, (s) => T_RPAREN),
+ (OPS, (s) => T_OP(s.mkString)))
+
+tokenize_file(Nil, "nominal_library.ML")
+
+
+
+
+type Grammar = List[(String, List[Token])]
+
+// grammar for arithmetic expressions
+val grammar =
+ List ("E" -> List(T_NUM),
+ "E" -> List(T_NT("E", Nil), T_OP("+"), T_NT("E", Nil)),
+ "E" -> List(T_NT("E", Nil), T_OP("-"), T_NT("E", Nil)),
+ "E" -> List(T_NT("E", Nil), T_OP("*"), T_NT("E", Nil)),
+ "E" -> List(T_LPAREN, T_NT("E", Nil), T_RPAREN))
+
+def startsWith[A](ts1: List[A], ts2: List[A]) : Boolean = (ts1, ts2) match {
+ case (_, Nil) => true
+ case (T_NT(e, _)::ts1,T_NT(f, _)::ts2) => (e == f) && startsWith(ts1, ts2)
+ case (t1::ts1, t2::ts2) => (t1 == t2) && startsWith(ts1, ts2)
+ case _ => false
+}
+
+def chop[A](ts1: List[A], prefix: List[A], ts2: List[A]) : Option[(List[A], List[A])] =
+ ts1 match {
+ case Nil => None
+ case t::ts =>
+ if (startsWith(ts1, prefix)) Some(ts2.reverse, ts1.drop(prefix.length))
+ else chop(ts, prefix, t::ts2)
+ }
+
+// examples
+chop(List(1,2,3,4,5,6,7,8,9), List(4,5), Nil)
+chop(List(1,2,3,4,5,6,7,8,9), List(3,5), Nil)
+
+def replace[A](ts: List[A], out: List[A], in: List [A]) =
+ chop(ts, out, Nil) match {
+ case None => None
+ case Some((before, after)) => Some(before ::: in ::: after)
+ }
+
+def parse1(g: Grammar, ts: List[Token]) : Boolean = ts match {
+ case List(T_NT("E", tree)) => { println(tree); true }
+ case _ => {
+ val tss = for ((lhs, rhs) <- g) yield replace(ts, rhs, List(T_NT(lhs, rhs)))
+ tss.flatten.exists(parse1(g, _))
+ }
+}
+
+
+println() ; parse1(grammar, tokenizer(lexing_rules, "2 + 3 * 4 + 1"))
+println() ; parse1(grammar, tokenizer(lexing_rules, "(2 + 3) * (4 + 1)"))
+println() ; parse1(grammar, tokenizer(lexing_rules, "(2 + 3) * 4 (4 + 1)"))
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/parser1.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,88 @@
+// A naive bottom-up parser with backtracking
+//
+// Needs:
+// :load matcher.scala
+
+// some regular expressions
+val DIGIT = RANGE("0123456789")
+val NONZERODIGIT = RANGE("123456789")
+
+val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0")
+val LPAREN = CHAR('(')
+val RPAREN = CHAR(')')
+val WHITESPACE = PLUS(RANGE(" \n"))
+val OPS = RANGE("+-*")
+
+// for classifying the strings that have been recognised
+
+abstract class Token
+case object T_WHITESPACE extends Token
+case object T_NUM extends Token
+case class T_OP(s: String) extends Token
+case object T_LPAREN extends Token
+case object T_RPAREN extends Token
+case class NT(s: String) extends Token
+
+// lexing rules for arithmetic expressions
+val lexing_rules: List[Rule[Token]]=
+ List((NUMBER, (s) => T_NUM),
+ (WHITESPACE, (s) => T_WHITESPACE),
+ (LPAREN, (s) => T_LPAREN),
+ (RPAREN, (s) => T_RPAREN),
+ (OPS, (s) => T_OP(s.mkString)))
+
+// the tokenizer
+val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE))
+
+type Grammar = List[(String, List[Token])]
+
+// grammar for arithmetic expressions
+val grammar =
+ List ("F" -> List(T_NUM),
+ "E" -> List(T_NUM),
+ "E" -> List(NT("E"), T_OP("+"), NT("E")),
+ "E" -> List(NT("E"), T_OP("-"), NT("E")),
+ "E" -> List(NT("E"), T_OP("*"), NT("E")),
+ "E" -> List(T_LPAREN, NT("E"), T_RPAREN))
+
+
+def chop[A](ts1: List[A], prefix: List[A], ts2: List[A]) : Option[(List[A], List[A])] =
+ ts1 match {
+ case Nil => None
+ case t::ts =>
+ if (ts1.startsWith(prefix)) Some(ts2.reverse, ts1.drop(prefix.length))
+ else chop(ts, prefix, t::ts2)
+ }
+
+// examples for chop
+chop(List(1,2,3,4,5,6,7,8,9), List(4,5), Nil)
+chop(List(1,2,3,4,5,6,7,8,9), List(3,5), Nil)
+
+def replace[A](ts: List[A], out: List[A], in: List [A]) =
+ chop(ts, out, Nil) match {
+ case None => None
+ case Some((before, after)) => Some(before ::: in ::: after)
+ }
+
+def parse(g: Grammar, ts: List[Token]) : Boolean = {
+ println(ts)
+ if (ts == List(NT("E"))) true
+ else {
+ val tss = for ((lhs, rhs) <- g) yield replace(ts, rhs, List(NT(lhs)))
+ tss.flatten.exists(parse(g, _))
+ }
+}
+
+def parser(g: Grammar, s: String) = {
+ println("\n")
+ parse(g, Tok.fromString(s))
+}
+
+
+
+parser(grammar, "2 + 3 * 4 + 1")
+parser(grammar, "(2 + 3) * (4 + 1)")
+parser(grammar, "(2 + 3) * 4 (4 + 1)")
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/parser2.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,139 @@
+// A naive version of parser combinators producing parse trees
+//
+// Needs
+// :load matcher.scala
+
+// some regular expressions
+val LETTER = RANGE("abcdefghijklmnopqrstuvwxyz")
+val ID = PLUS(LETTER)
+
+val DIGIT = RANGE("0123456789")
+val NONZERODIGIT = RANGE("123456789")
+val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0")
+
+val LPAREN = CHAR('(')
+val RPAREN = CHAR(')')
+
+val WHITESPACE = PLUS(RANGE(" \n"))
+val OPS = RANGE("+-*")
+
+// for classifying the strings that have been recognised
+abstract class Token
+
+case object T_WHITESPACE extends Token
+case class T_NUM(s: String) extends Token
+case class T_ID(s: String) extends Token
+case class T_OP(s: String) extends Token
+case object T_LPAREN extends Token
+case object T_RPAREN extends Token
+case object T_IF extends Token
+case object T_THEN extends Token
+case object T_ELSE extends Token
+
+// lexing rules for arithmetic expressions
+val lexing_rules: List[Rule[Token]]=
+ List(("if", (s) => T_IF),
+ ("then", (s) => T_THEN),
+ ("else", (s) => T_ELSE),
+ (NUMBER, (s) => T_NUM(s.mkString)),
+ (ID, (s) => T_ID(s.mkString)),
+ (WHITESPACE, (s) => T_WHITESPACE),
+ (LPAREN, (s) => T_LPAREN),
+ (RPAREN, (s) => T_RPAREN),
+ (OPS, (s) => T_OP(s.mkString)))
+
+val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE))
+
+
+// parse trees
+abstract class ParseTree
+case class Leaf(t: Token) extends ParseTree
+case class Branch(pts: List[ParseTree]) extends ParseTree
+
+def combine(pt1: ParseTree, pt2: ParseTree) = pt1 match {
+ case Leaf(t) => Branch(List(Leaf(t), pt2))
+ case Branch(pts) => Branch(pts ++ List(pt2))
+}
+
+// parser combinators
+abstract class Parser {
+ def parse(ts: List[Token]): Set[(ParseTree, List[Token])]
+
+ def parse_all(ts: List[Token]) : Set[ParseTree] =
+ for ((head, tail) <- parse(ts); if (tail == Nil)) yield head
+
+ def || (right : => Parser) : Parser = new AltParser(this, right)
+ def ~ (right : => Parser) : Parser = new SeqParser(this, right)
+}
+
+class AltParser(p: => Parser, q: => Parser) extends Parser {
+ def parse (ts: List[Token]) = p.parse(ts) ++ q.parse(ts)
+}
+
+class SeqParser(p: => Parser, q: => Parser) extends Parser {
+ def parse(ts: List[Token]) =
+ for ((head1, tail1) <- p.parse(ts);
+ (head2, tail2) <- q.parse(tail1)) yield (combine(head1, head2), tail2)
+}
+
+class ListParser(ps: => List[Parser]) extends Parser {
+ def parse(ts: List[Token]) = ps match {
+ case Nil => Set()
+ case p::Nil => p.parse(ts)
+ case p::ps =>
+ for ((head1, tail1) <- p.parse(ts);
+ (head2, tail2) <- new ListParser(ps).parse(tail1)) yield (Branch(List(head1, head2)), tail2)
+ }
+}
+
+case class TokParser(tok: Token) extends Parser {
+ def parse(ts: List[Token]) = ts match {
+ case t::ts if (t == tok) => Set((Leaf(t), ts))
+ case _ => Set ()
+ }
+}
+
+implicit def token2tparser(t: Token) = TokParser(t)
+
+case object IdParser extends Parser {
+ def parse(ts: List[Token]) = ts match {
+ case T_ID(s)::ts => Set((Leaf(T_ID(s)), ts))
+ case _ => Set ()
+ }
+}
+
+case object NumParser extends Parser {
+ def parse(ts: List[Token]) = ts match {
+ case T_NUM(s)::ts => Set((Leaf(T_NUM(s)), ts))
+ case _ => Set ()
+ }
+}
+
+lazy val E: Parser = (T ~ T_OP("+") ~ E) || T // start symbol
+lazy val T: Parser = (F ~ T_OP("*") ~ T) || F
+lazy val F: Parser = (T_LPAREN ~ E ~ T_RPAREN) || NumParser
+
+println(Tok.fromString("1 + 2 + 3"))
+println(E.parse_all(Tok.fromString("1 + 2 + 3")))
+
+def eval(t: ParseTree) : Int = t match {
+ case Leaf(T_NUM(n)) => n.toInt
+ case Branch(List(t1, Leaf(T_OP("+")), t2)) => eval(t1) + eval(t2)
+ case Branch(List(t1, Leaf(T_OP("*")), t2)) => eval(t1) * eval(t2)
+ case Branch(List(Leaf(T_LPAREN), t, Leaf(T_RPAREN))) => eval(t)
+}
+
+(E.parse_all(Tok.fromString("1 + 2 + 3"))).map(eval(_))
+(E.parse_all(Tok.fromString("1 + 2 * 3"))).map(eval(_))
+
+lazy val EXPR: Parser =
+ new ListParser(List(T_IF, EXPR, T_THEN, EXPR)) ||
+ new ListParser(List(T_IF, EXPR, T_THEN, EXPR, T_ELSE, EXPR)) ||
+ IdParser
+
+println(EXPR.parse_all(Tok.fromString("if a then b else c")))
+println(EXPR.parse_all(Tok.fromString("if a then if x then y else c")))
+
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/parser2a.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,105 @@
+// Parser combinators including semantic actions
+// parses lists of tokens
+//
+// Needs
+// :load matcher.scala
+
+// some regular expressions
+val LETTER = RANGE("abcdefghijklmnopqrstuvwxyz")
+val ID = PLUS(LETTER)
+
+val DIGIT = RANGE("0123456789")
+val NONZERODIGIT = RANGE("123456789")
+val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0")
+
+val LPAREN = CHAR('(')
+val RPAREN = CHAR(')')
+
+val WHITESPACE = PLUS(RANGE(" \n"))
+val OPS = RANGE("+-*")
+
+// for classifying the strings that have been recognised
+abstract class Token
+
+case object T_WHITESPACE extends Token
+case class T_NUM(s: String) extends Token
+case class T_ID(s: String) extends Token
+case class T_OP(s: String) extends Token
+case object T_LPAREN extends Token
+case object T_RPAREN extends Token
+case object T_IF extends Token
+case object T_THEN extends Token
+case object T_ELSE extends Token
+
+// lexing rules for arithmetic expressions
+val lexing_rules: List[Rule[Token]]=
+ List(("if", (s) => T_IF),
+ ("then", (s) => T_THEN),
+ ("else", (s) => T_ELSE),
+ (NUMBER, (s) => T_NUM(s.mkString)),
+ (ID, (s) => T_ID(s.mkString)),
+ (WHITESPACE, (s) => T_WHITESPACE),
+ (LPAREN, (s) => T_LPAREN),
+ (RPAREN, (s) => T_RPAREN),
+ (OPS, (s) => T_OP(s.mkString)))
+
+val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE))
+
+// parser combinators with return type T
+abstract class Parser[T] {
+ def parse(ts: List[Token]): Set[(T, List[Token])]
+
+ def parse_all(ts: List[Token]) : Set[T] =
+ for ((head, tail) <- parse(ts); if (tail == Nil)) yield head
+
+ def || (right : => Parser[T]) : Parser[T] = new AltParser(this, right)
+ def ==>[S] (f: => T => S) : Parser [S] = new FunParser(this, f)
+ def ~[S] (right : => Parser[S]) : Parser[(T, S)] = new SeqParser(this, right)
+ def ~>[S] (right : => Parser[S]) : Parser[S] = this ~ right ==> (x => x._2)
+ def <~[S] (right : => Parser[S]) : Parser[T] = this ~ right ==> (x => x._1)
+
+}
+
+class SeqParser[T, S](p: => Parser[T], q: => Parser[S]) extends Parser[(T, S)] {
+ def parse(sb: List[Token]) =
+ for ((head1, tail1) <- p.parse(sb);
+ (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2)
+}
+
+class AltParser[T](p: => Parser[T], q: => Parser[T]) extends Parser[T] {
+ def parse (sb: List[Token]) = p.parse(sb) ++ q.parse(sb)
+}
+
+class FunParser[T, S](p: => Parser[T], f: T => S) extends Parser[S] {
+ def parse (sb: List[Token]) =
+ for ((head, tail) <- p.parse(sb)) yield (f(head), tail)
+}
+
+
+case class TokParser(tok: Token) extends Parser[Token] {
+ def parse(ts: List[Token]) = ts match {
+ case t::ts if (t == tok) => Set((t, ts))
+ case _ => Set ()
+ }
+}
+
+implicit def token2tparser(t: Token) = TokParser(t)
+
+case object NumParser extends Parser[Int] {
+ def parse(ts: List[Token]) = ts match {
+ case T_NUM(s)::ts => Set((s.toInt, ts))
+ case _ => Set ()
+ }
+}
+
+lazy val E: Parser[Int] = (T ~ T_OP("+") ~ E) ==> { case ((x, y), z) => x + z } || T
+lazy val T: Parser[Int] = (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => x * z } || F
+lazy val F: Parser[Int] = (T_LPAREN ~> E <~ T_RPAREN) || NumParser
+
+println(E.parse_all(Tok.fromString("1 + 2 + 3")))
+println(E.parse_all(Tok.fromString("1 + 2 * 3")))
+println(E.parse_all(Tok.fromString("(1 + 2) * 3")))
+
+// Excercise: implement minus
+println(E.parse_all(Tok.fromString("(1 - 2) * 3")))
+println(E.parse_all(Tok.fromString("(1 + 2) * - 3")))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/parser3.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,41 @@
+package object parser {
+
+// parser combinators
+// with input type I and return type T
+//
+// needs to be compiled with scalac parser3.scala
+
+abstract class Parser[I <% Seq[_], T] {
+ def parse(ts: I): Set[(T, I)]
+
+ def parse_all(ts: I) : Set[T] =
+ for ((head, tail) <- parse(ts); if (tail.isEmpty)) yield head
+
+ def parse_single(ts: I) : T = parse_all(ts).toList match {
+ case t::Nil => t
+ case _ => { println ("Parse Error") ; sys.exit(-1) }
+ }
+
+ def || (right : => Parser[I, T]) : Parser[I, T] = new AltParser(this, right)
+ def ==>[S] (f: => T => S) : Parser [I, S] = new FunParser(this, f)
+ def ~[S] (right : => Parser[I, S]) : Parser[I, (T, S)] = new SeqParser(this, right)
+ def ~>[S] (right : => Parser[I, S]) : Parser[I, S] = this ~ right ==> (_._2)
+ def <~[S] (right : => Parser[I, S]) : Parser[I, T] = this ~ right ==> (_._1)
+}
+
+class SeqParser[I <% Seq[_], T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, (T, S)] {
+ def parse(sb: I) =
+ for ((head1, tail1) <- p.parse(sb);
+ (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2)
+}
+
+class AltParser[I <% Seq[_], T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] {
+ def parse(sb: I) = p.parse(sb) ++ q.parse(sb)
+}
+
+class FunParser[I <% Seq[_], T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] {
+ def parse(sb: I) =
+ for ((head, tail) <- p.parse(sb)) yield (f(head), tail)
+}
+
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/parser4.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,82 @@
+
+// parser combinators with input type I and return type T
+
+case class SubString(s: String, l: Int, h: Int) {
+ def low = l
+ def high = h
+ def length = h - l
+ def substring(l: Int = l, h: Int = h) = s.slice(l, h)
+ def set(low: Int = l, high: Int = h) = SubString(s, low, high)
+
+}
+
+type Ctxt = List[(String, SubString)]
+
+abstract class Parser[T] {
+
+ def parse(ts: SubString, ctxt: Ctxt): Set[(T, SubString)]
+
+ def parse_all(s: String) : Set[T] =
+ for ((head, tail) <- parse(SubString(s, 0, s.length), Nil); if (tail.substring() == "")) yield head
+
+ def || (right : => Parser[T]) : Parser[T] = new AltParser(this, right)
+ def ==>[S] (f: => T => S) : Parser [S] = new FunParser(this, f)
+ def ~[S] (right : => Parser[S]) : Parser[(T, S)] = new SeqParser(this, right)
+}
+
+class SeqParser[T, S](p: => Parser[T], q: => Parser[S]) extends Parser[(T, S)] {
+ def parse(sb: SubString, ctxt: Ctxt) =
+ for ((head1, tail1) <- p.parse(sb, ctxt);
+ (head2, tail2) <- q.parse(tail1, ctxt)) yield ((head1, head2), tail2)
+}
+
+class AltParser[T](p: => Parser[T], q: => Parser[T]) extends Parser[T] {
+ def parse(sb: SubString, ctxt: Ctxt) = p.parse(sb, ctxt) ++ q.parse(sb, ctxt)
+}
+
+class FunParser[T, S](p: => Parser[T], f: T => S) extends Parser[S] {
+ def parse(sb: SubString, ctxt: Ctxt) =
+ for ((head, tail) <- p.parse(sb, ctxt)) yield (f(head), tail)
+}
+
+case class SubStringParser(s: String) extends Parser[SubString] {
+ val n = s.length
+ def parse(sb: SubString, ctxt: Ctxt) = {
+ if (n <= sb.length && sb.substring(sb.low, sb.low + n) == s)
+ Set((sb.set(high = sb.low + n), sb.set(low = sb.low + n)))
+ else Set()
+ }
+}
+
+implicit def string2parser(s: String) = SubStringParser(s) ==> (_.substring())
+
+class IgnLst[T](p: => Parser[T]) extends Parser[T] {
+ def parse(sb: SubString, ctxt: Ctxt) = {
+ if (sb.length == 0) Set()
+ else for ((head, tail) <- p.parse(sb.set(high = sb.high - 1), ctxt))
+ yield (head, tail.set(high = tail.high + 1))
+ }
+}
+
+class CHECK[T](nt: String, p: => Parser[T]) extends Parser[T] {
+ def parse(sb: SubString, ctxt: Ctxt) = {
+ val should_trim = ctxt.contains (nt, sb)
+ if (should_trim && sb.length == 0) Set()
+ else if (should_trim) new IgnLst(p).parse(sb, (nt, sb)::ctxt)
+ else p.parse(sb, (nt, sb)::ctxt)
+ }
+}
+
+// ambigous grammar
+lazy val E: Parser[Int] =
+ new CHECK("E", (E ~ "+" ~ E) ==> { case ((x, y), z) => x + z} ||
+ (E ~ "*" ~ E) ==> { case ((x, y), z) => x * z} ||
+ ("(" ~ E ~ ")") ==> { case ((x, y), z) => y} ||
+ "0" ==> { (s) => 0 } ||
+ "1" ==> { (s) => 1 } ||
+ "2" ==> { (s) => 2 } ||
+ "3" ==> { (s) => 3 })
+
+println(E.parse_all("1+2*3+3"))
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/parser5.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,113 @@
+val DIGIT = RANGE("0123456789")
+val NONZERODIGIT = RANGE("123456789")
+
+val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0")
+val LPAREN = CHAR('(')
+val RPAREN = CHAR(')')
+val WHITESPACE = PLUS(RANGE(" \n"))
+val OPS = RANGE("+-*")
+
+// for classifying the strings that have been recognised
+abstract class Token
+case object T_WHITESPACE extends Token
+case class T_NUM(s: String) extends Token
+case class T_OP(s: String) extends Token
+case object T_LPAREN extends Token
+case object T_RPAREN extends Token
+
+val lexing_rules: List[Rule[Token]]=
+ List((NUMBER, (s) => T_NUM(s.mkString)),
+ (WHITESPACE, (s) => T_WHITESPACE),
+ (LPAREN, (s) => T_LPAREN),
+ (RPAREN, (s) => T_RPAREN),
+ (OPS, (s) => T_OP(s.mkString)))
+
+val Tk = Tokenizer(lexing_rules, List(T_WHITESPACE))
+
+
+// parser combinators with input type I and return type T
+// and memoisation
+
+case class SubList[T](s: List[T], l: Int, h: Int) {
+ def low = l
+ def high = h
+ def length = h - l
+ def sublist(l: Int = l, h: Int = h) = s.slice(l, h)
+ def set(low: Int = l, high: Int = h) = SubList(s, low, high)
+}
+
+type Ctxt[T] = List[(String, SubList[T])]
+
+abstract class Parser[I, T] {
+
+ def parse(ts: SubList[I], ctxt: Ctxt[I]): Set[(T, SubList[I])]
+
+ def parse_all(s: List[I]) : Set[T] =
+ for ((head, tail) <- parse(SubList(s, 0, s.length), Nil); if (tail.sublist() == Nil)) yield head
+
+ def || (right : => Parser[I, T]) : Parser[I, T] = new AltParser(this, right)
+ def ==>[S] (f: => T => S) : Parser [I, S] = new FunParser(this, f)
+ def ~[S] (right : => Parser[I, S]) : Parser[I, (T, S)] = new SeqParser(this, right)
+ def ~>[S] (right : => Parser[I, S]) : Parser[I, S] = this ~ right ==> (_._2)
+ def <~[S] (right : => Parser[I, S]) : Parser[I, T] = this ~ right ==> (_._1)
+}
+
+class SeqParser[I, T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, (T, S)] {
+ def parse(sb: SubList[I], ctxt: Ctxt[I]) =
+ for ((head1, tail1) <- p.parse(sb, ctxt);
+ (head2, tail2) <- q.parse(tail1, ctxt)) yield ((head1, head2), tail2)
+}
+
+class AltParser[I, T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] {
+ def parse(sb: SubList[I], ctxt: Ctxt[I]) = p.parse(sb, ctxt) ++ q.parse(sb, ctxt)
+}
+
+class FunParser[I, T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] {
+ def parse(sb: SubList[I], ctxt: Ctxt[I]) =
+ for ((head, tail) <- p.parse(sb, ctxt)) yield (f(head), tail)
+}
+
+case object NumParser extends Parser[Token, Int] {
+ def parse(sb: SubList[Token], ctxt: Ctxt[Token]) = {
+ if (0 < sb.length) sb.sublist(sb.low, sb.low + 1) match {
+ case T_NUM(i)::Nil => Set((i.toInt, sb.set(low = sb.low + 1)))
+ case _ => Set()
+ }
+ else Set()
+ }
+}
+
+case class TokParser(t: Token) extends Parser[Token, Token] {
+ def parse(sb: SubList[Token], ctxt: Ctxt[Token]) = {
+ if (0 < sb.length && sb.sublist(sb.low, sb.low + 1) == List(t)) Set((t, sb.set(low = sb.low + 1)))
+ else Set()
+ }
+}
+
+implicit def token2tparser(t: Token) = TokParser(t)
+
+class IgnLst[I, T](p: => Parser[I, T]) extends Parser[I, T] {
+ def parse(sb: SubList[I], ctxt: Ctxt[I]) = {
+ if (sb.length == 0) Set()
+ else for ((head, tail) <- p.parse(sb.set(high = sb.high - 1), ctxt))
+ yield (head, tail.set(high = tail.high + 1))
+ }
+}
+
+class CHECK[I, T](nt: String, p: => Parser[I, T]) extends Parser[I, T] {
+ def parse(sb: SubList[I], ctxt: Ctxt[I]) = {
+ val should_trim = ctxt.contains (nt, sb)
+ if (should_trim && sb.length == 0) Set()
+ else if (should_trim) new IgnLst(p).parse(sb, (nt, sb)::ctxt)
+ else p.parse(sb, (nt, sb)::ctxt)
+ }
+}
+
+lazy val E: Parser[Token, Int] =
+ new CHECK("E", (E ~ T_OP("+") ~ E) ==> { case ((x, y), z) => x + z} ||
+ (E ~ T_OP("*") ~ E) ==> { case ((x, y), z) => x * z} ||
+ (T_LPAREN ~ E ~ T_RPAREN) ==> { case ((x, y), z) => y} ||
+ NumParser)
+
+println(E.parse_all(Tk.fromString("1 + 2 * 3")))
+println(E.parse_all(Tk.fromString("(1 + 2) * 3")))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/re-alt.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,115 @@
+trait RegExp {
+ def nullable: Boolean
+ def derive(c: Char): RegExp
+}
+
+case object Empty extends RegExp {
+ def nullable = false
+ def derive(c: Char) = Empty
+}
+
+case object Eps extends RegExp {
+ def nullable = true
+ def derive(c: Char) = Empty
+}
+
+case class Str(s: String) extends RegExp {
+ def nullable = s.isEmpty
+ def derive(c: Char) =
+ if (s.isEmpty || s.head != c) Empty
+ else Str(s.tail)
+}
+
+case class Cat(r: RegExp, s: RegExp) extends RegExp {
+ def nullable = r.nullable && s.nullable
+ def derive(c: Char) =
+ if (r.nullable) Or(Cat(r.derive(c), s), s.derive(c))
+ else Cat(r.derive(c), s)
+}
+
+case class Star(r: RegExp) extends RegExp {
+ def nullable = true
+ def derive(c: Char) = Cat(r.derive(c), this)
+}
+
+case class Or(r: RegExp, s: RegExp) extends RegExp {
+ def nullable = r.nullable || s.nullable
+ def derive(c: Char) = Or(r.derive(c), s.derive(c))
+}
+
+case class And(r: RegExp, s: RegExp) extends RegExp {
+ def nullable = r.nullable && s.nullable
+ def derive(c: Char) = And(r.derive(c), s.derive(c))
+}
+
+case class Not(r: RegExp) extends RegExp {
+ def nullable = !r.nullable
+ def derive(c: Char) = Not(r.derive(c))
+}
+
+
+
+
+object Matcher {
+ def matches(r: RegExp, s: String): Boolean = {
+ if (s.isEmpty) r.nullable
+ else matches(r.derive(s.head), s.tail)
+ }
+}
+
+
+object Pimps {
+ implicit def string2RegExp(s: String) = Str(s)
+
+ implicit def regExpOps(r: RegExp) = new {
+ def | (s: RegExp) = Or(r, s)
+ def & (s: RegExp) = And(r, s)
+ def % = Star(r)
+ def %(n: Int) = rep(r, n)
+ def ? = Or(Eps, r)
+ def ! = Not(r)
+ def ++ (s: RegExp) = Cat(r, s)
+ def ~ (s: String) = Matcher.matches(r, s)
+ }
+
+ implicit def stringOps(s: String) = new {
+ def | (r: RegExp) = Or(s, r)
+ def | (r: String) = Or(s, r)
+ def & (r: RegExp) = And(s, r)
+ def & (r: String) = And(s, r)
+ def % = Star(s)
+ def % (n: Int) = rep(Str(s), n)
+ def ? = Or(Eps, s)
+ def ! = Not(s)
+ def ++ (r: RegExp) = Cat(s, r)
+ def ++ (r: String) = Cat(s, r)
+ def ~ (t: String) = Matcher.matches(s, t)
+ }
+
+ def rep(r: RegExp, n: Int): RegExp =
+ if (n <= 0) Star(r)
+ else Cat(r, rep(r, n - 1))
+}
+
+
+object Test {
+ import Pimps._
+
+ val digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
+ val int = ("+" | "-").? ++ digit.%(1)
+ val real = ("+" | "-").? ++ digit.%(1) ++ ("." ++ digit.%(1)).? ++ (("e" | "E") ++ ("+" | "-").? ++ digit.%(1)).?
+
+ def main(args: Array[String]) {
+ val ints = List("0", "-4534", "+049", "99")
+ val reals = List("0.9", "-12.8", "+91.0", "9e12", "+9.21E-12", "-512E+01")
+ val errs = List("", "-", "+", "+-1", "-+2", "2-")
+
+ ints.foreach(s => assert(int ~ s))
+ reals.foreach(s => assert(!(int ~ s)))
+ errs.foreach(s => assert(!(int ~ s)))
+
+ ints.foreach(s => assert(real ~ s))
+ reals.foreach(s => assert(real ~ s))
+ errs.foreach(s => assert(!(real ~ s)))
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/re-internal.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,17 @@
+
+// measures the time a function needs
+def time_needed[T](i: Int, code: => T) = {
+ val start = System.nanoTime()
+ for (j <- 1 to i) code
+ val end = System.nanoTime()
+ (end - start)/(i * 1.0e9)
+}
+
+
+for (i <- 1 to 10001 by 300) {
+ val re = ("((a?){" + i + "})(a{" + i + "})")
+ println(i + " " + "%.5f".format(time_needed(1, ("a" * i).matches(re))))
+}
+
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/re.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,123 @@
+
+// regular expressions including NOT
+abstract class Rexp
+
+case object NULL extends Rexp
+case object EMPTY extends Rexp
+case class CHAR(c: Char) extends Rexp
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+case class STAR(r: Rexp) extends Rexp
+case class NOT(r: Rexp) extends Rexp
+
+
+// some convenience for typing in regular expressions
+def charlist2rexp(s : List[Char]) : Rexp = s match {
+ case Nil => EMPTY
+ case c::Nil => CHAR(c)
+ case c::s => SEQ(CHAR(c), charlist2rexp(s))
+}
+implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
+
+
+// nullable function: tests whether the regular
+// expression can recognise the empty string
+def nullable (r: Rexp) : Boolean = r match {
+ case NULL => false
+ case EMPTY => true
+ case CHAR(_) => false
+ case ALT(r1, r2) => nullable(r1) || nullable(r2)
+ case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ case STAR(_) => true
+ case NOT(r) => !(nullable(r))
+}
+
+// tests whether a regular expression
+// cannot recognise more
+def no_more (r: Rexp) : Boolean = r match {
+ case NULL => true
+ case EMPTY => false
+ case CHAR(_) => false
+ case ALT(r1, r2) => no_more(r1) && no_more(r2)
+ case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1)
+ case STAR(_) => false
+ case NOT(r) => !(no_more(r))
+}
+
+
+// derivative of a regular expression w.r.t. a character
+def der (c: Char, r: Rexp) : Rexp = r match {
+ case NULL => NULL
+ case EMPTY => NULL
+ case CHAR(d) => if (c == d) EMPTY else NULL
+ case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
+ case SEQ(r1, r2) =>
+ if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
+ else SEQ(der(c, r1), r2)
+ case STAR(r) => SEQ(der(c, r), STAR(r))
+ case NOT(r) => NOT(der (c, r))
+}
+
+
+// regular expression for specifying
+// ranges of characters
+def RANGE(s : List[Char]) : Rexp = s match {
+ case Nil => NULL
+ case c::Nil => CHAR(c)
+ case c::s => ALT(CHAR(c), RANGE(s))
+}
+
+//one or more
+def PLUS(r: Rexp) = SEQ(r, STAR(r))
+
+
+//some regular expressions
+val LOWERCASE = RANGE("abcdefghijklmnopqrstuvwxyz".toList)
+val UPPERCASE = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZ".toList)
+val LETTER = ALT(LOWERCASE, UPPERCASE)
+val DIGIT = RANGE("0123456789".toList)
+val NONZERODIGIT = RANGE("123456789".toList)
+
+val IDENT = SEQ(LETTER, STAR(ALT(LETTER,DIGIT)))
+val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0")
+val WHITESPACE = RANGE(" \n".toList)
+val WHITESPACES = PLUS(WHITESPACE)
+
+val ALL = ALT(ALT(LETTER, DIGIT), WHITESPACE)
+val COMMENT = SEQ(SEQ("/*", NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL)))), "*/")
+
+
+// an example list of regular expressions
+val regs: List[Rexp]= List("if", "then", "else", "+", IDENT, NUMBER, WHITESPACES, COMMENT)
+
+
+def error (s: String) = throw new IllegalArgumentException ("Could not lex " + s)
+
+def munch(r: Rexp, s: List[Char], t: List[Char]) : Option[(List[Char], List[Char])] =
+ s match {
+ case Nil if (nullable(r)) => Some(Nil, t)
+ case Nil => None
+ case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, t)
+ case c::s if (no_more(der (c, r))) => None
+ case c::s => munch(der (c, r), s, t ::: List(c))
+ }
+
+def one_string (regs: List[Rexp], s: List[Char]) : (List[Char], List[Char]) = {
+ val somes = regs.map { munch(_, s, Nil) } .flatten
+ if (somes == Nil) error(s.mkString) else (somes sortBy (_._1.length) head)
+}
+
+def tokenize (regs: List[Rexp], s: List[Char]) : List[String] = s match {
+ case Nil => Nil
+ case _ => one_string(regs, s) match {
+ case (rest, s) => s.mkString :: tokenize(regs, rest)
+ }
+}
+
+//examples
+println(tokenize(regs, "if true then then 42 else +".toList))
+println(tokenize(regs, "if+true+then+then+42+else +".toList))
+println(tokenize(regs, "ifff if 34 34".toList))
+println(tokenize(regs, "/*ifff if */ hhjj /*34 */".toList))
+println(tokenize(regs, "/* if true then */ then 42 else +".toList))
+//println(tokenize(regs, "ifff $ if 34".toList)) // causes an error because of the symbol $
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/re0.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,117 @@
+import scala.annotation.tailrec
+
+abstract class Rexp
+
+case object NULL extends Rexp
+case object EMPTY extends Rexp
+case object ALLCHAR extends Rexp
+case class CHAR(c: Char) extends Rexp
+case class STR(s: String) extends Rexp
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+case class STAR(r: Rexp) extends Rexp
+case class NOT(r: Rexp) extends Rexp
+case class REP(r: Rexp, n: Int) extends Rexp
+
+// some convenience for typing in regular expressions
+implicit def string2rexp(s : String) : Rexp = STR(s)
+
+implicit def RexpOps(r: Rexp) = new {
+ def | (s: Rexp) = ALT(r, s)
+ def % = STAR(r)
+ def %(n: Int) = REP(r, n)
+ def %%(n: Int) = SEQ(REP(r, n), STAR(r))
+ def ? = ALT(EMPTY, r)
+ def unary_! = NOT(r)
+ def ~ (s: Rexp) = SEQ(r, s)
+}
+
+implicit def stringOps(s: String) = new {
+ def | (r: Rexp) = ALT(s, r)
+ def | (r: String) = ALT(s, r)
+ def % = STAR(s)
+ def %(n: Int) = REP(s, n)
+ def %%(n: Int) = SEQ(REP(s, n), STAR(s))
+ def ? = ALT(EMPTY, s)
+ def unary_! = NOT(s)
+ def ~ (r: Rexp) = SEQ(s, r)
+ def ~ (r: String) = SEQ(s, r)
+}
+
+
+// nullable function: tests whether the regular
+// expression can recognise the empty string
+
+def nullable (r: Rexp) : Boolean = r match {
+ case NULL => false
+ case EMPTY => true
+ case ALLCHAR => false
+ case CHAR(_) => false
+ case STR(s) => s.isEmpty
+ case ALT(r1, r2) => nullable(r1) || nullable(r2)
+ case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ case STAR(_) => true
+ case NOT(r) => !(nullable(r))
+ case REP(r, i) => if (i == 0) true else nullable(r)
+}
+
+// derivative of a regular expression w.r.t. a character
+def der (c: Char, r: Rexp) : Rexp = r match {
+ case NULL => NULL
+ case EMPTY => NULL
+ case ALLCHAR => EMPTY
+ case CHAR(d) => if (c == d) EMPTY else NULL
+ case STR(s) => if (s.isEmpty || s.head != c) NULL else STR(s.tail)
+ case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
+ case SEQ(r1, r2) =>
+ if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
+ else SEQ(der(c, r1), r2)
+ case STAR(r) => SEQ(der(c, r), STAR(r))
+ case NOT(r) => NOT(der (c, r))
+ case REP(r, i) =>
+ if (i == 0) NULL else SEQ(der(c, r), REP(r, i - 1))
+}
+
+// derivative w.r.t. a string (iterates der)
+@tailrec
+def ders (s: List[Char], r: Rexp) : Rexp = s match {
+ case Nil => r
+ case c::s => ders(s, der(c, r))
+}
+
+// main matcher function
+def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
+
+//examples
+val digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
+val int = ("+" | "-").? ~ digit.%%(1)
+val real = ("+" | "-").? ~ digit.%%(1) ~ ("." ~ digit.%%(1)).? ~ (("e" | "E") ~ ("+" | "-").? ~ digit.%%(1)).?
+
+val ints = List("0", "-4534", "+049", "99")
+val reals = List("0.9", "-12.8", "+91.0", "9e12", "+9.21E-12", "-512E+01")
+val errs = List("", "-", "+", "+-1", "-+2", "2-")
+
+ints.map(s => matcher(int, s))
+reals.map(s => matcher(int, s))
+errs.map(s => matcher(int, s))
+
+ints.map(s => matcher(real, s))
+reals.map(s => matcher(real, s))
+errs.map(s => matcher(real, s))
+
+
+
+def RTEST(n: Int) = ("a".? %(n)) ~ ("a" %(n))
+
+def time_needed[T](i: Int, code: => T) = {
+ val start = System.nanoTime()
+ for (j <- 1 to i) code
+ val end = System.nanoTime()
+ (end - start)/(i * 1.0e9)
+}
+
+for (i <- 1 to 12000 by 500) {
+ println(i + ": " + "%.5f".format(time_needed(1, matcher(RTEST(i), "a" * i))))
+}
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/re1.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,80 @@
+
+abstract class Rexp
+
+case object NULL extends Rexp
+case object EMPTY extends Rexp
+case class CHAR(c: Char) extends Rexp
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+case class STAR(r: Rexp) extends Rexp
+
+// some convenience for typing in regular expressions
+def charlist2rexp(s : List[Char]) : Rexp = s match {
+ case Nil => EMPTY
+ case c::Nil => CHAR(c)
+ case c::s => SEQ(CHAR(c), charlist2rexp(s))
+}
+implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
+
+
+// nullable function: tests whether the regular
+// expression can recognise the empty string
+def nullable (r: Rexp) : Boolean = r match {
+ case NULL => false
+ case EMPTY => true
+ case CHAR(_) => false
+ case ALT(r1, r2) => nullable(r1) || nullable(r2)
+ case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ case STAR(_) => true
+}
+
+// derivative of a regular expression w.r.t. a character
+def der (c: Char, r: Rexp) : Rexp = r match {
+ case NULL => NULL
+ case EMPTY => NULL
+ case CHAR(d) => if (c == d) EMPTY else NULL
+ case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
+ case SEQ(r1, r2) =>
+ if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
+ else SEQ(der(c, r1), r2)
+ case STAR(r) => SEQ(der(c, r), STAR(r))
+}
+
+// derivative w.r.t. a string (iterates der)
+def ders (s: List[Char], r: Rexp) : Rexp = s match {
+ case Nil => r
+ case c::s => ders(s, der(c, r))
+}
+
+// main matcher function
+def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
+
+//example
+//val r = STAR(ALT(SEQ(CHAR('a'), CHAR('b')), CHAR('b')))
+//der('b', r)
+//der('b', r)
+
+//one or zero
+def OPT(r: Rexp) = ALT(r, EMPTY)
+
+//n-times
+def NTIMES(r: Rexp, n: Int) : Rexp = n match {
+ case 0 => EMPTY
+ case 1 => r
+ case n => SEQ(r, NTIMES(r, n - 1))
+}
+
+def RTEST(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n))
+
+def time_needed[T](i: Int, code: => T) = {
+ val start = System.nanoTime()
+ for (j <- 1 to i) code
+ val end = System.nanoTime()
+ (end - start)/(i * 1.0e9)
+}
+
+for (i <- 1 to 29) {
+ println(i + ": " + "%.5f".format(time_needed(1, matcher(RTEST(i), "a" * i))))
+}
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/re2.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,100 @@
+
+abstract class Rexp {
+ def simp : Rexp = this
+}
+
+case object NULL extends Rexp
+case object EMPTY extends Rexp
+case class CHAR(c: Char) extends Rexp
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp {
+ override def simp = (r1.simp, r2.simp) match {
+ case (NULL, r) => r
+ case (r, NULL) => r
+ case (r, EMPTY) => if (nullable(r)) r else ALT(r, EMPTY)
+ case (EMPTY, r) => if (nullable(r)) r else ALT(r, EMPTY)
+ case (r1, r2) => if (r1 == r2) r1 else ALT(r1, r2)
+ }
+}
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp {
+ override def simp = (r1.simp, r2.simp) match {
+ case (NULL, _) => NULL
+ case (_, NULL) => NULL
+ case (EMPTY, r) => r
+ case (r, EMPTY) => r
+ case (r1, r2) => SEQ(r1, r2)
+ }
+}
+case class STAR(r: Rexp) extends Rexp {
+ override def simp = r.simp match {
+ case NULL => EMPTY
+ case EMPTY => EMPTY
+ case r => STAR(r)
+ }
+}
+
+// some convenience for typing in regular expressions
+def charlist2rexp(s : List[Char]) : Rexp = s match {
+ case Nil => EMPTY
+ case c::Nil => CHAR(c)
+ case c::s => SEQ(CHAR(c), charlist2rexp(s))
+}
+implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
+
+
+// nullable function: tests whether the regular
+// expression can recognise the empty string
+def nullable (r: Rexp) : Boolean = r match {
+ case NULL => false
+ case EMPTY => true
+ case CHAR(_) => false
+ case ALT(r1, r2) => nullable(r1) || nullable(r2)
+ case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ case STAR(_) => true
+}
+
+// derivative of a regular expression w.r.t. a character
+def der (c: Char, r: Rexp) : Rexp = r match {
+ case NULL => NULL
+ case EMPTY => NULL
+ case CHAR(d) => if (c == d) EMPTY else NULL
+ case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
+ case SEQ(r1, r2) =>
+ if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
+ else SEQ(der(c, r1), r2)
+ case STAR(r) => SEQ(der(c, r), STAR(r))
+}
+
+// derivative w.r.t. a string (iterates der)
+def ders (s: List[Char], r: Rexp) : Rexp = s match {
+ case Nil => r
+ case c::s => ders(s, der(c, r).simp)
+}
+
+// main matcher function
+def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
+
+
+//one or zero
+def OPT(r: Rexp) = ALT(r, EMPTY)
+
+//n-times
+def NTIMES(r: Rexp, n: Int) : Rexp = n match {
+ case 0 => EMPTY
+ case 1 => r
+ case n => SEQ(r, NTIMES(r, n - 1))
+}
+
+def RTEST(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n))
+
+def time_needed[T](i: Int, code: => T) = {
+ val start = System.nanoTime()
+ for (j <- 1 to i) code
+ val end = System.nanoTime()
+ (end - start)/(i * 1.0e9)
+}
+
+for (i <- 1 to 100) {
+ println(i + ": " + "%.5f".format(time_needed(1, matcher(RTEST(i), "a" * i))))
+}
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/re3.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,106 @@
+
+abstract class Rexp {
+ def simp : Rexp = this
+}
+
+case object NULL extends Rexp
+case object EMPTY extends Rexp
+case class CHAR(c: Char) extends Rexp
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp {
+ override def simp = (r1.simp, r2.simp) match {
+ case (NULL, r) => r
+ case (r, NULL) => r
+ case (r, EMPTY) => if (nullable(r)) r else ALT(r, EMPTY)
+ case (EMPTY, r) => if (nullable(r)) r else ALT(r, EMPTY)
+ case (r1, r2) => if (r1 == r2) r1 else ALT(r1, r2)
+ }
+}
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp {
+ override def simp = (r1.simp, r2.simp) match {
+ case (NULL, _) => NULL
+ case (_, NULL) => NULL
+ case (EMPTY, r) => r
+ case (r, EMPTY) => r
+ case (r1, r2) => SEQ(r1, r2)
+ }
+}
+case class STAR(r: Rexp) extends Rexp {
+ override def simp = r.simp match {
+ case NULL => EMPTY
+ case EMPTY => EMPTY
+ case r => STAR(r)
+ }
+}
+case class NTIMES(r: Rexp, n: Int) extends Rexp {
+ override def simp = if (n == 0) EMPTY else
+ r.simp match {
+ case NULL => NULL
+ case EMPTY => EMPTY
+ case r => NTIMES(r, n)
+ }
+}
+
+// some convenience for typing in regular expressions
+def charlist2rexp(s : List[Char]) : Rexp = s match {
+ case Nil => EMPTY
+ case c::Nil => CHAR(c)
+ case c::s => SEQ(CHAR(c), charlist2rexp(s))
+}
+implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
+
+
+// nullable function: tests whether the regular
+// expression can recognise the empty string
+def nullable (r: Rexp) : Boolean = r match {
+ case NULL => false
+ case EMPTY => true
+ case CHAR(_) => false
+ case ALT(r1, r2) => nullable(r1) || nullable(r2)
+ case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ case STAR(_) => true
+ case NTIMES(r, i) => if (i == 0) true else nullable(r)
+}
+
+// derivative of a regular expression w.r.t. a character
+def der (c: Char, r: Rexp) : Rexp = r match {
+ case NULL => NULL
+ case EMPTY => NULL
+ case CHAR(d) => if (c == d) EMPTY else NULL
+ case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
+ case SEQ(r1, r2) =>
+ if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
+ else SEQ(der(c, r1), r2)
+ case STAR(r) => SEQ(der(c, r), STAR(r))
+ case NTIMES(r, i) =>
+ if (i == 0) NULL else SEQ(der(c, r), NTIMES(r, i - 1))
+}
+
+// derivative w.r.t. a string (iterates der)
+def ders (s: List[Char], r: Rexp) : Rexp = s match {
+ case Nil => r
+ case c::s => ders(s, der(c, r).simp)
+}
+
+// main matcher function
+def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
+
+
+
+//one or zero
+def OPT(r: Rexp) = ALT(r, EMPTY)
+
+def RTEST(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n))
+
+def time_needed[T](i: Int, code: => T) = {
+ val start = System.nanoTime()
+ for (j <- 1 to i) code
+ val end = System.nanoTime()
+ (end - start)/(i * 1.0e9)
+}
+
+
+for (i <- 1 to 11001 by 500) {
+ println(i + " " + + " " + time_needed(1, matcher(RTEST(i), "a" * i)))
+}
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/re4.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,101 @@
+import scala.annotation.tailrec
+abstract class Rexp {
+ def simp : Rexp = this
+}
+
+case object NULL extends Rexp
+case object EMPTY extends Rexp
+case class CHAR(c: Char) extends Rexp
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp {
+ override def simp = (r1.simp, r2.simp) match {
+ case (NULL, r) => r
+ case (r, NULL) => r
+ case (r, EMPTY) => if (nullable(r)) r else ALT(r, EMPTY)
+ case (EMPTY, r) => if (nullable(r)) r else ALT(r, EMPTY)
+ case (r1, r2) => ALT(r1, r2)
+ }
+}
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp {
+ override def simp = (r1.simp, r2.simp) match {
+ case (NULL, _) => NULL
+ case (_, NULL) => NULL
+ case (EMPTY, r) => r
+ case (r, EMPTY) => r
+ case (r1, r2) => SEQ(r1, r2)
+ }
+}
+case class STAR(r: Rexp) extends Rexp
+case class NTIMES(r: Rexp, n: Int) extends Rexp
+
+// some convenience for typing in regular expressions
+def charlist2rexp(s : List[Char]) : Rexp = s match {
+ case Nil => EMPTY
+ case c::Nil => CHAR(c)
+ case c::s => SEQ(CHAR(c), charlist2rexp(s))
+}
+implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
+
+
+// nullable function: tests whether the regular
+// expression can recognise the empty string
+def nullable (r: Rexp) : Boolean = r match {
+ case NULL => false
+ case EMPTY => true
+ case CHAR(_) => false
+ case ALT(r1, r2) => nullable(r1) || nullable(r2)
+ case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ case STAR(_) => true
+ case NTIMES(r, i) => if (i == 0) false else nullable(r)
+}
+
+// derivative of a regular expression w.r.t. a character
+def der (c: Char, r: Rexp) : Rexp = r match {
+ case NULL => NULL
+ case EMPTY => NULL
+ case CHAR(d) => if (c == d) EMPTY else NULL
+ case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
+ case SEQ(r1, r2) =>
+ if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
+ else SEQ(der(c, r1), r2)
+ case STAR(r) => SEQ(der(c, r), STAR(r))
+ case NTIMES(r, i) =>
+ if (i == 0) NULL else SEQ(der(c, r), NTIMES(r, i - 1))
+}
+
+// derivative w.r.t. a string (iterates der)
+@tailrec
+def ders (s: List[Char], r: Rexp) : Rexp = s match {
+ case Nil => r
+ case c::s => ders(s, der(c, r).simp)
+}
+
+// main matcher function
+def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
+
+
+
+//one or zero
+def OPT(r: Rexp) = ALT(r, EMPTY)
+
+//n-times
+/*def NTIMES(r: Rexp, n: Int) : Rexp = n match {
+ case 0 => NULL
+ case 1 => r
+ case n => SEQ(r, NTIMES(r, n - 1))
+}*/
+
+def RTEST(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n))
+
+def time_needed[T](i: Int, code: => T) = {
+ val start = System.nanoTime()
+ for (j <- 1 to i) code
+ val end = System.nanoTime()
+ (end - start)/(i * 1.0e9)
+}
+
+
+for (i <- 1 to 13001 by 500) {
+ println(i + " " + time_needed(1, matcher(RTEST(i), "a" * i)))
+}
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/regexp.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,106 @@
+// regular expressions
+abstract class Rexp
+
+case object NULL extends Rexp
+case object EMPTY extends Rexp
+case class CHAR(c: Char) extends Rexp
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+case class STAR(r: Rexp) extends Rexp
+
+// some convenience for typing in regular expressions
+def charlist2rexp(s : List[Char]) : Rexp = s match {
+ case Nil => EMPTY
+ case c::Nil => CHAR(c)
+ case c::s => SEQ(CHAR(c), charlist2rexp(s))
+}
+implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
+
+
+// for example
+println(STAR("abc"))
+
+// produces STAR(SEQ(CHAR(a),SEQ(CHAR(b),SEQ(CHAR(c),EMPTY))))
+
+
+
+// a simple-minded regular expression matcher:
+// it loops for examples like STAR(EMPTY) with
+// strings this regular expression does not match
+
+def smatchers(rs: List[Rexp], s: List[Char]) : Boolean = (rs, s) match {
+ case (NULL::rs, s) => false
+ case (EMPTY::rs, s) => smatchers(rs, s)
+ case (CHAR(c)::rs, Nil) => false
+ case (CHAR(c)::rs, d::s) => (c ==d) && smatchers(rs, s)
+ case (ALT(r1, r2)::rs, s) => smatchers(r1::rs, s) || smatchers(r2::rs, s)
+ case (SEQ(r1, r2)::rs, s) => smatchers(r1::r2::rs, s)
+ case (STAR(r)::rs, s) => smatchers(rs, s) || smatchers(r::STAR(r)::rs, s)
+ case (Nil, s) => s == Nil
+}
+
+def smatcher(r: Rexp, s: String) = smatchers(List(r), s.toList)
+
+// regular expression: a
+println(smatcher(CHAR('a'), "ab"))
+
+// regular expression: a + (b o c)
+println(smatcher(ALT(CHAR('a'), SEQ(CHAR('b'), CHAR('c'))), "ab"))
+
+// regular expression: a + (b o c)
+println(smatcher(ALT(CHAR('a'), SEQ(CHAR('b'), CHAR('c'))), "bc"))
+
+// loops for regular expression epsilon*
+//println(smatcher(STAR(EMPTY), "a"))
+
+
+
+// Regular expression matcher that works properly
+//================================================
+
+// nullable function: tests whether the regular
+// expression can recognise the empty string
+def nullable (r: Rexp) : Boolean = r match {
+ case NULL => false
+ case EMPTY => true
+ case CHAR(_) => false
+ case ALT(r1, r2) => nullable(r1) || nullable(r2)
+ case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ case STAR(_) => true
+}
+
+// derivative of a regular expression w.r.t. a character
+def der (c: Char, r: Rexp) : Rexp = r match {
+ case NULL => NULL
+ case EMPTY => NULL
+ case CHAR(d) => if (c == d) EMPTY else NULL
+ case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
+ case SEQ(r1, r2) =>
+ if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
+ else SEQ(der(c, r1), r2)
+ case STAR(r) => SEQ(der(c, r), STAR(r))
+}
+
+// derivative w.r.t. a string (iterates der)
+def ders (s: List[Char], r: Rexp) : Rexp = s match {
+ case Nil => r
+ case c::s => ders(s, der(c, r))
+}
+
+// main matcher function
+def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
+
+
+//examples
+
+println(matcher(SEQ(STAR("a"), STAR("b")), "bbaaa"))
+println(matcher(ALT(STAR("a"), STAR("b")), ""))
+println(matcher("abc", ""))
+println(matcher(STAR(ALT(EMPTY, "a")), ""))
+println(matcher(STAR(EMPTY), "a"))
+println(matcher("cab","cab"))
+println(matcher(STAR("a"),"aaa"))
+println(matcher("cab" ,"cab"))
+println(matcher(STAR("a"),"aaa"))
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/regexp2.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,123 @@
+
+// regular expressions including NOT
+abstract class Rexp
+
+case object NULL extends Rexp
+case object EMPTY extends Rexp
+case class CHAR(c: Char) extends Rexp
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+case class STAR(r: Rexp) extends Rexp
+case class NOT(r: Rexp) extends Rexp
+
+
+// some convenience for typing in regular expressions
+def charlist2rexp(s : List[Char]) : Rexp = s match {
+ case Nil => EMPTY
+ case c::Nil => CHAR(c)
+ case c::s => SEQ(CHAR(c), charlist2rexp(s))
+}
+implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
+
+
+// nullable function: tests whether the regular
+// expression can recognise the empty string
+def nullable (r: Rexp) : Boolean = r match {
+ case NULL => false
+ case EMPTY => true
+ case CHAR(_) => false
+ case ALT(r1, r2) => nullable(r1) || nullable(r2)
+ case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ case STAR(_) => true
+ case NOT(r) => !(nullable(r))
+}
+
+// tests whether a regular expression
+// cannot recognise more
+def no_more (r: Rexp) : Boolean = r match {
+ case NULL => true
+ case EMPTY => false
+ case CHAR(_) => false
+ case ALT(r1, r2) => no_more(r1) && no_more(r2)
+ case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1)
+ case STAR(_) => false
+ case NOT(r) => !(no_more(r))
+}
+
+
+// derivative of a regular expression w.r.t. a character
+def der (c: Char, r: Rexp) : Rexp = r match {
+ case NULL => NULL
+ case EMPTY => NULL
+ case CHAR(d) => if (c == d) EMPTY else NULL
+ case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
+ case SEQ(r1, r2) =>
+ if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
+ else SEQ(der(c, r1), r2)
+ case STAR(r) => SEQ(der(c, r), STAR(r))
+ case NOT(r) => NOT(der (c, r))
+}
+
+
+// regular expression for specifying
+// ranges of characters
+def RANGE(s : List[Char]) : Rexp = s match {
+ case Nil => NULL
+ case c::Nil => CHAR(c)
+ case c::s => ALT(CHAR(c), RANGE(s))
+}
+
+//one or more
+def PLUS(r: Rexp) = SEQ(r, STAR(r))
+
+
+//some regular expressions
+val LOWERCASE = RANGE("abcdefghijklmnopqrstuvwxyz".toList)
+val UPPERCASE = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZ".toList)
+val LETTER = ALT(LOWERCASE, UPPERCASE)
+val DIGIT = RANGE("0123456789".toList)
+val NONZERODIGIT = RANGE("123456789".toList)
+
+val IDENT = SEQ(LETTER, STAR(ALT(LETTER,DIGIT)))
+val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0")
+val WHITESPACE = RANGE(" \n".toList)
+val WHITESPACES = PLUS(WHITESPACE)
+
+val ALL = ALT(ALT(LETTER, DIGIT), WHITESPACE)
+val COMMENT = SEQ(SEQ("/*", NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL)))), "*/")
+
+
+// an example list of regular expressions
+val regs: List[Rexp]= List("if", "then", "else", "+", IDENT, NUMBER, WHITESPACES, COMMENT)
+
+
+def error (s: String) = throw new IllegalArgumentException ("Could not lex " + s)
+
+def munch(r: Rexp, s: List[Char], t: List[Char]) : Option[(List[Char], List[Char])] =
+ s match {
+ case Nil if (nullable(r)) => Some(Nil, t)
+ case Nil => None
+ case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, t)
+ case c::s if (no_more(der (c, r))) => None
+ case c::s => munch(der (c, r), s, t ::: List(c))
+ }
+
+def one_string (regs: List[Rexp], s: List[Char]) : (List[Char], List[Char]) = {
+ val somes = regs.map { munch(_, s, Nil) } .flatten
+ if (somes == Nil) error(s.mkString) else (somes sortBy (_._1.length) head)
+}
+
+def tokenize (regs: List[Rexp], s: List[Char]) : List[String] = s match {
+ case Nil => Nil
+ case _ => one_string(regs, s) match {
+ case (rest, s) => s.mkString :: tokenize(regs, rest)
+ }
+}
+
+//examples
+println(tokenize(regs, "if true then then 42 else +".toList))
+println(tokenize(regs, "if+true+then+then+42+else +".toList))
+println(tokenize(regs, "ifff if 34 34".toList))
+println(tokenize(regs, "/*ifff if */ hhjj /*34 */".toList))
+println(tokenize(regs, "/* if true then */ then 42 else +".toList))
+//println(tokenize(regs, "ifff $ if 34".toList)) // causes an error because of the symbol $
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/regexp3.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,141 @@
+
+// regular expressions including NOT
+abstract class Rexp
+
+case object NULL extends Rexp
+case object EMPTY extends Rexp
+case class CHAR(c: Char) extends Rexp
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+case class STAR(r: Rexp) extends Rexp
+case class NOT(r: Rexp) extends Rexp
+
+
+// some convenience for typing in regular expressions
+def charlist2rexp(s : List[Char]) : Rexp = s match {
+ case Nil => EMPTY
+ case c::Nil => CHAR(c)
+ case c::s => SEQ(CHAR(c), charlist2rexp(s))
+}
+implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
+
+
+// nullable function: tests whether the regular
+// expression can recognise the empty string
+def nullable (r: Rexp) : Boolean = r match {
+ case NULL => false
+ case EMPTY => true
+ case CHAR(_) => false
+ case ALT(r1, r2) => nullable(r1) || nullable(r2)
+ case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ case STAR(_) => true
+ case NOT(r) => !(nullable(r))
+}
+
+// tests whether a regular expression
+// cannot recognise more
+def no_more (r: Rexp) : Boolean = r match {
+ case NULL => true
+ case EMPTY => false
+ case CHAR(_) => false
+ case ALT(r1, r2) => no_more(r1) && no_more(r2)
+ case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1)
+ case STAR(_) => false
+ case NOT(r) => !(no_more(r))
+}
+
+
+// derivative of a regular expression w.r.t. a character
+def der (c: Char, r: Rexp) : Rexp = r match {
+ case NULL => NULL
+ case EMPTY => NULL case CHAR(d) => if (c == d) EMPTY else NULL
+ case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
+ case SEQ(r1, r2) =>
+ if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
+ else SEQ(der(c, r1), r2)
+ case STAR(r) => SEQ(der(c, r), STAR(r))
+ case NOT(r) => NOT(der (c, r))
+}
+
+// regular expression for specifying
+// ranges of characters
+def RANGE(s : List[Char]) : Rexp = s match {
+ case Nil => NULL
+ case c::Nil => CHAR(c)
+ case c::s => ALT(CHAR(c), RANGE(s))
+}
+
+// one or more
+def PLUS(r: Rexp) = SEQ(r, STAR(r))
+
+// some regular expressions
+val LOWERCASE = RANGE("abcdefghijklmnopqrstuvwxyz".toList)
+val UPPERCASE = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZ".toList)
+val LETTER = ALT(LOWERCASE, UPPERCASE)
+val DIGIT = RANGE("0123456789".toList)
+val NONZERODIGIT = RANGE("123456789".toList)
+
+val IDENT = SEQ(LETTER, STAR(ALT(LETTER,DIGIT)))
+val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0")
+val WHITESPACE = RANGE(" \n".toList)
+val WHITESPACES = PLUS(WHITESPACE)
+
+val ALL = ALT(ALT(LETTER, DIGIT), WHITESPACE)
+val COMMENT = SEQ(SEQ("/*", NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL)))), "*/")
+
+
+// for classifying the strings that have been recognised
+abstract class Token
+
+case object T_WHITESPACE extends Token
+case object T_COMMENT extends Token
+case class T_IDENT(s: String) extends Token
+case class T_OP(s: String) extends Token
+case class T_NUM(n: Int) extends Token
+case class T_KEYWORD(s: String) extends Token
+
+
+// an example list of syntactic rules
+type Rule = (Rexp, List[Char] => Token)
+
+val rules: List[Rule]=
+ List(("if", (s) => T_KEYWORD(s.mkString)),
+ ("then", (s) => T_KEYWORD(s.mkString)),
+ ("else", (s) => T_KEYWORD(s.mkString)),
+ ("+", (s) => T_OP(s.mkString)),
+ (IDENT, (s) => T_IDENT(s.mkString)),
+ (NUMBER, (s) => T_NUM(s.mkString.toInt)),
+ (WHITESPACES, (s) => T_WHITESPACE),
+ (COMMENT, (s) => T_COMMENT))
+
+
+def error (s: String) = throw new IllegalArgumentException ("Cannot tokenize: " + s)
+
+def munch(r: Rexp, action: List[Char] => Token, s: List[Char], t: List[Char]) : Option[(List[Char], Token)] =
+ s match {
+ case Nil if (nullable(r)) => Some(Nil, action(t))
+ case Nil => None
+ case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, action(t))
+ case c::s if (no_more(der (c, r))) => None
+ case c::s => munch(der (c, r), action, s, t ::: List(c))
+ }
+
+def one_token (rs: List[Rule], s: List[Char]) : (List[Char], Token) = {
+ val somes = rs.map { (r) => munch(r._1, r._2, s, Nil) } .flatten
+ if (somes == Nil) error(s.mkString) else (somes sortBy (_._1.length) head)
+}
+
+def tokenize (rs: List[Rule], s: List[Char]) : List[Token] = s match {
+ case Nil => Nil
+ case _ => one_token(rs, s) match {
+ case (rest, token) => token :: tokenize(rs, rest)
+ }
+}
+
+//examples
+println(tokenize(rules, "if true then then 42 else +".toList))
+println(tokenize(rules, "if+true+then+then+42+else +".toList))
+println(tokenize(rules, "ifff if 34 34".toList))
+println(tokenize(rules, "/*ifff if */ hhjj /*34 */".toList))
+println(tokenize(rules, "/* if true then */ then 42 else +".toList))
+//println(tokenize(rules, "ifff $ if 34".toList)) // causes an error because of the symbol $
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/regexp4.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,168 @@
+// regular expressions
+abstract class Rexp
+
+case object NULL extends Rexp
+case object EMPTY extends Rexp
+case class CHAR(c: Char) extends Rexp
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+case class STAR(r: Rexp) extends Rexp
+case class NOT(r: Rexp) extends Rexp
+
+
+// some convenience for typing in regular expressions
+def charlist2rexp(s : List[Char]) : Rexp = s match {
+ case Nil => EMPTY
+ case c::Nil => CHAR(c)
+ case c::s => SEQ(CHAR(c), charlist2rexp(s))
+}
+implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
+
+
+// nullable function: tests whether the regular
+// expression can recognise the empty string
+def nullable (r: Rexp) : Boolean = r match {
+ case NULL => false
+ case EMPTY => true
+ case CHAR(_) => false
+ case ALT(r1, r2) => nullable(r1) || nullable(r2)
+ case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ case STAR(_) => true
+ case NOT(r) => !(nullable(r))
+}
+
+// tests whether a regular expression
+// recognises nothing
+def zeroable (r: Rexp) : Boolean = r match {
+ case NULL => true
+ case EMPTY => false
+ case CHAR(_) => false
+ case ALT(r1, r2) => zeroable(r1) && zeroable(r2)
+ case SEQ(r1, r2) => zeroable(r1) || zeroable(r2)
+ case STAR(_) => false
+ case NOT(r) => !(zeroable(r))
+}
+
+def starts_with (r: Rexp, c: Char) : Boolean = r match {
+ case NULL => false
+ case EMPTY => false
+ case CHAR(d) => (c == d)
+ case ALT(r1, r2) => starts_with(r1, c) || starts_with(r2, c)
+ case SEQ(r1, r2) => if (nullable(r1)) (starts_with(r1, c) || starts_with(r2, c))
+ else starts_with(r1, c)
+ case STAR(r) => starts_with(r, c)
+ case NOT(r) => !(starts_with(r, c))
+}
+
+// derivative of a regular expression w.r.t. a character
+def der (c: Char, r: Rexp) : Rexp = r match {
+ case NULL => NULL
+ case EMPTY => NULL
+ case CHAR(d) => if (c == d) EMPTY else NULL
+ case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
+ case SEQ(r1, r2) =>
+ if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
+ else SEQ(der(c, r1), r2)
+ case STAR(r) => SEQ(der(c, r), STAR(r))
+ case NOT(r) => NOT(der (c, r))
+}
+
+// derivative w.r.t. a string (iterates der)
+def ders (s: List[Char], r: Rexp) : Rexp = s match {
+ case Nil => r
+ case c::s => ders(s, der(c, r))
+}
+
+// main matcher function
+def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
+
+
+// regular expression for specifying
+// ranges of characters
+def RANGE(s : List[Char]) : Rexp = s match {
+ case Nil => NULL
+ case c::Nil => CHAR(c)
+ case c::s => ALT(CHAR(c), RANGE(s))
+}
+
+//one or more
+def PLUS(r: Rexp) = SEQ(r, STAR(r))
+
+
+//some regular expressions
+val LOWERCASE = RANGE("abcdefghijklmnopqrstuvwxyz".toList)
+val UPPERCASE = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZ".toList)
+val LETTER = ALT(LOWERCASE, UPPERCASE)
+val DIGITS = RANGE("0123456789".toList)
+val NONZERODIGITS = RANGE("123456789".toList)
+
+val IDENT = SEQ(LETTER, STAR(ALT(LETTER,DIGITS)))
+val NUMBER = ALT(SEQ(NONZERODIGITS, STAR(DIGITS)), "0")
+val WHITESPACE = RANGE(" \n".toList)
+val SYMBOLS = RANGE("/*".toList)
+
+val ALL = ALT(ALT(ALT(LETTER, DIGITS), WHITESPACE), SYMBOLS)
+
+val COMMENT = SEQ(SEQ("/*", NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL)))), "*/")
+
+println(matcher(NUMBER, "0"))
+println(matcher(NUMBER, "01"))
+println(matcher(NUMBER, "123450"))
+
+println(matcher(SEQ(STAR("a"), STAR("b")), "bbaaa"))
+println(matcher(ALT(STAR("a"), STAR("b")), ""))
+println(matcher("abc", ""))
+println(matcher(STAR(ALT(EMPTY, "a")), ""))
+println(matcher(STAR(EMPTY), "a"))
+println(matcher("cab","cab"))
+println(matcher(STAR("a"),"aaa"))
+println(matcher("cab" ,"cab"))
+println(matcher(STAR("a"),"aaa"))
+
+println(matcher(COMMENT, "/* */"))
+println(matcher(COMMENT, "/* foobar comment */"))
+println(matcher(COMMENT, "/* test */ test */"))
+
+// an example list of regular expressions
+val regs: List[Rexp]= List("if", "then", "else", "+", IDENT, NUMBER, COMMENT, WHITESPACE)
+
+
+def error (s: String) = throw new IllegalArgumentException ("Could not lex " + s)
+
+def munch(r: Rexp, s: List[Char], t: List[Char]) : Option[(List[Char], List[Char])] =
+ if (zeroable(r)) None else s match {
+ case Nil => if (nullable(r)) Some(Nil, t) else None
+ case c::s if (zeroable(der (c, r)) && nullable(r)) => Some(c::s, t)
+ //case c::s if (zeroable(der (c, r))) => None
+ case c::s => munch(der (c, r), s, t ::: List(c))
+}
+
+
+def lex_one (regs: List[Rexp], s: List[Char]) : (List[Char], List[Char]) = {
+ val somes = regs.map { munch(_, s, Nil) } .flatten
+ if (somes == Nil) error(s.mkString) else (somes sortBy (_._1.length) head)
+}
+
+def lex_all (regs: List[Rexp], s: List[Char]) : List[String] = s match {
+ case Nil => Nil
+ case _ => lex_one(regs, s) match {
+ case (rest, s) => s.mkString :: lex_all(regs, rest)
+ }
+}
+
+
+
+starts_with(der('/', COMMENT), '*')
+
+munch(COMMENT, "/*ifff if 34 */".toList, Nil)
+val COMMENT2 = NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL)))
+
+der('a', COMMENT2)
+zeroable(der('a', COMMENT2))
+
+matcher(COMMENT2, "ifff if 34")
+munch(COMMENT2, "ifff if 34".toList, Nil)
+starts_with(COMMENT2, 'i')
+lex_all(regs, "ifff if 34".toList)
+lex_all(regs, "ifff $ if 34".toList)
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/regexp5.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,177 @@
+// regular expressions
+abstract class Rexp
+
+case object NULL extends Rexp
+case object EMPTY extends Rexp
+case class CHAR(c: Char) extends Rexp
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+case class STAR(r: Rexp) extends Rexp
+case class NOT(r: Rexp) extends Rexp
+
+
+// some convenience for typing in regular expressions
+def charlist2rexp(s : List[Char]) : Rexp = s match {
+ case Nil => EMPTY
+ case c::Nil => CHAR(c)
+ case c::s => SEQ(CHAR(c), charlist2rexp(s))
+}
+implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
+
+
+// nullable function: tests whether the regular
+// expression can recognise the empty string
+def nullable (r: Rexp) : Boolean = r match {
+ case NULL => false
+ case EMPTY => true
+ case CHAR(_) => false
+ case ALT(r1, r2) => nullable(r1) || nullable(r2)
+ case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ case STAR(_) => true
+ case NOT(r) => !(nullable(r))
+}
+
+// tests whether a regular expression
+// recognises nothing
+def zeroable (r: Rexp) : Boolean = r match {
+ case NULL => true
+ case EMPTY => false
+ case CHAR(_) => false
+ case ALT(r1, r2) => zeroable(r1) && zeroable(r2)
+ case SEQ(r1, r2) => if (nullable(r1)) (zeroable(r1) && zeroable(r2)) else zeroable(r1)
+ //zeroable(r1) || zeroable(r2)
+ case STAR(_) => false
+ case NOT(r) => !(zeroable(r))
+}
+
+
+// derivative of a regular expression w.r.t. a character
+def der (c: Char, r: Rexp) : Rexp = r match {
+ case NULL => NULL
+ case EMPTY => NULL
+ case CHAR(d) => if (c == d) EMPTY else NULL
+ case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
+ case SEQ(r1, r2) =>
+ if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
+ else SEQ(der(c, r1), r2)
+ case STAR(r) => SEQ(der(c, r), STAR(r))
+ case NOT(r) => NOT(der (c, r))
+}
+
+// derivative w.r.t. a string (iterates der)
+def ders (s: List[Char], r: Rexp) : Rexp = s match {
+ case Nil => r
+ case c::s => ders(s, der(c, r))
+}
+
+// main matcher function
+def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
+
+
+// regular expression for specifying
+// ranges of characters
+def RANGE(s : List[Char]) : Rexp = s match {
+ case Nil => NULL
+ case c::Nil => CHAR(c)
+ case c::s => ALT(CHAR(c), RANGE(s))
+}
+
+//one or more
+def PLUS(r: Rexp) = SEQ(r, STAR(r))
+
+
+//some regular expressions
+val LOWERCASE = RANGE("abcdefghijklmnopqrstuvwxyz".toList)
+val UPPERCASE = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZ".toList)
+val LETTER = ALT(LOWERCASE, UPPERCASE)
+val DIGITS = RANGE("0123456789".toList)
+val NONZERODIGITS = RANGE("123456789".toList)
+
+val IDENT = SEQ(LETTER, STAR(ALT(LETTER,DIGITS)))
+val NUMBER = ALT(SEQ(NONZERODIGITS, STAR(DIGITS)), "0")
+val WHITESPACE = RANGE(" \n".toList)
+
+val ALL = ALT(ALT(LETTER, DIGITS), WHITESPACE)
+
+val COMMENT = SEQ(SEQ("/*", NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL)))), "*/")
+
+println(matcher(NUMBER, "0"))
+println(matcher(NUMBER, "01"))
+println(matcher(NUMBER, "123450"))
+
+println(matcher(SEQ(STAR("a"), STAR("b")), "bbaaa"))
+println(matcher(ALT(STAR("a"), STAR("b")), ""))
+println(matcher("abc", ""))
+println(matcher(STAR(ALT(EMPTY, "a")), ""))
+println(matcher(STAR(EMPTY), "a"))
+println(matcher("cab","cab"))
+println(matcher(STAR("a"),"aaa"))
+println(matcher("cab" ,"cab"))
+println(matcher(STAR("a"),"aaa"))
+
+println(matcher(COMMENT, "/* */"))
+println(matcher(COMMENT, "/* 34 */"))
+println(matcher(COMMENT, "/* foobar comment */"))
+println(matcher(COMMENT, "/* test */ test */"))
+
+// an example list of regular expressions
+
+abstract class Token
+
+case object T_WHITESPACE extends Token
+case object T_COMMENT extends Token
+case class T_IDENT(s: String) extends Token
+case class T_OP(s: String) extends Token
+case class T_NUM(n: Int) extends Token
+case class T_KEYWORD(s: String) extends Token
+
+val regs: List[Rexp]= List("if", "then", "else", "+", IDENT, NUMBER, WHITESPACE)
+
+type Rule = (Rexp, List[Char] => Token)
+
+val rules: List[Rule]=
+ List(("if", (s) => T_KEYWORD(s.mkString)),
+ ("then", (s) => T_KEYWORD(s.mkString)),
+ ("else", (s) => T_KEYWORD(s.mkString)),
+ ("+", (s) => T_OP(s.mkString)),
+ (IDENT, (s) => T_IDENT(s.mkString)),
+ (NUMBER, (s) => T_NUM(s.mkString.toInt)),
+ (WHITESPACE, (s) => T_WHITESPACE),
+ (COMMENT, (s) => T_COMMENT))
+
+
+def error (s: String) = throw new IllegalArgumentException ("Could not lex " + s)
+
+def munch(r: Rexp, action: List[Char] => Token, s: List[Char], t: List[Char]) : Option[(List[Char], Token)] =
+{ println("string " + s)
+ println(" rexp " + r)
+ s match {
+ case Nil if (nullable(r)) => Some(Nil, action(t))
+ case Nil => { println("1"); None }
+ case c::s if (zeroable(der (c, r)) && nullable(r)) => Some(c::s, action(t))
+ case c::s if (zeroable(der (c, r))) => { println("2"); None }
+ case c::s => munch(der (c, r), action, s, t ::: List(c))
+ }
+}
+
+def lex_one (rs: List[Rule], s: List[Char]) : (List[Char], Token) = {
+ val somes = rs.map { (r) => munch(r._1, r._2, s, Nil) } .flatten
+ if (somes == Nil) error(s.mkString) else (somes sortBy (_._1.length) head)
+}
+
+def lex_all (rs: List[Rule], s: List[Char]) : List[Token] = s match {
+ case Nil => Nil
+ case _ => lex_one(rs, s) match {
+ case (rest, t) => t :: lex_all(rs, rest)
+ }
+}
+
+
+
+println(matcher(COMMENT, "/*ifff if 34 34*/"))
+rules.map { (r) => munch(r._1, r._2, "/*ifff if 34 34*/ ".toList, Nil) }
+println(lex_all(rules, "ifff if 34 34".toList))
+println(lex_all(rules, " /*ifff if 34 34*/ ".toList))
+println(lex_all(rules, "ifff $ if 34".toList))
+
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/scraper.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,57 @@
+import java.io.OutputStreamWriter
+import java.net.URL
+import scala.io.Source.fromInputStream
+
+val url = new URL("http://www.envir.gov.cn/eng/airep/index.asp")
+
+//connect to url
+val conn = url.openConnection
+conn.setRequestProperty("User-Agent", "")
+conn.setDoOutput(true)
+conn.connect
+
+//sending data
+val wr = new OutputStreamWriter(conn.getOutputStream())
+//wr.write("Fdate=2012-9-24&Tdate=2012-09-25")
+//wr.write("Fdate=2012-9-18&Tdate=2012-09-25")
+wr.write("Fdate=2001-5-18&Tdate=2012-09-25")
+wr.flush
+wr.close
+
+//receiving data
+val page = fromInputStream(conn.getInputStream).getLines.mkString("\n")
+
+//println(page)
+
+// regular expression . excludes newlines,
+// therefore we have to use [\S\s]
+val regex1 = """<tr align="center">[\S\s]*?</tr>""".r
+val rows = regex1.findAllIn(page).toList
+
+//print(rows)
+
+val regex2 = """<td align="center">([\S\s]*?)</td>""".r
+
+def aux(s: String) : Array[String] = {
+ for (m <- regex2.findAllIn(s).toArray) yield m match {
+ case regex2(value) => value.trim
+ }
+}
+
+val data = rows.map { aux }
+
+def compare(i: Int)(e: Array[String], f: Array[String]) = e(i).toInt < f(i).toInt
+
+//day with highest particle pollution (PM_10)
+data.sortWith(compare(1)).last
+
+//day with highest sulfur dioxide (SO_2)
+data.sortWith(compare(2)).last
+
+//day with highest nitro dioxide (NO_2)
+data.sortWith(compare(3)).last
+
+//days with highest PM_10
+val groups = data.groupBy(_(1).toInt)
+val max_key = groups.keySet.max
+groups(max_key)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/while.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,231 @@
+// A parser and evaluator for teh while language
+//
+import matcher._
+import parser._
+
+
+// some regular expressions
+val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_")
+val DIGIT = RANGE("0123456789")
+val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT)))
+val NUM = PLUS(DIGIT)
+val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false")
+val SEMI: Rexp = ";"
+val OP: Rexp = ALTS(":=", "=", "-", "+", "*", "!=", "<", ">")
+val WHITESPACE = PLUS(RANGE(" \n"))
+val RPAREN: Rexp = ")"
+val LPAREN: Rexp = "("
+val BEGIN: Rexp = "{"
+val END: Rexp = "}"
+
+// tokens for classifying the strings that have been recognised
+abstract class Token
+case object T_WHITESPACE extends Token
+case object T_SEMI extends Token
+case object T_LPAREN extends Token
+case object T_RPAREN extends Token
+case object T_BEGIN extends Token
+case object T_END extends Token
+case class T_ID(s: String) extends Token
+case class T_OP(s: String) extends Token
+case class T_NUM(s: String) extends Token
+case class T_KWD(s: String) extends Token
+
+val lexing_rules: List[(Rexp, List[Char] => Token)] =
+ List((KEYWORD, (s) => T_KWD(s.mkString)),
+ (ID, (s) => T_ID(s.mkString)),
+ (OP, (s) => T_OP(s.mkString)),
+ (NUM, (s) => T_NUM(s.mkString)),
+ (SEMI, (s) => T_SEMI),
+ (LPAREN, (s) => T_LPAREN),
+ (RPAREN, (s) => T_RPAREN),
+ (BEGIN, (s) => T_BEGIN),
+ (END, (s) => T_END),
+ (WHITESPACE, (s) => T_WHITESPACE))
+
+// the tokenizer
+val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE))
+
+// the abstract syntax trees
+abstract class Stmt
+abstract class AExp
+abstract class BExp
+type Block = List[Stmt]
+case object Skip extends Stmt
+case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt
+case class While(b: BExp, bl: Block) extends Stmt
+case class Assign(s: String, a: AExp) extends Stmt
+
+case class Var(s: String) extends AExp
+case class Num(i: Int) extends AExp
+case class Aop(o: String, a1: AExp, a2: AExp) extends AExp
+
+case object True extends BExp
+case object False extends BExp
+case class Bop(o: String, a1: AExp, a2: AExp) extends BExp
+
+// atomic parsers
+case class TokParser(tok: Token) extends Parser[List[Token], Token] {
+ def parse(ts: List[Token]) = ts match {
+ case t::ts if (t == tok) => Set((t, ts))
+ case _ => Set ()
+ }
+}
+implicit def token2tparser(t: Token) = TokParser(t)
+
+case object NumParser extends Parser[List[Token], Int] {
+ def parse(ts: List[Token]) = ts match {
+ case T_NUM(s)::ts => Set((s.toInt, ts))
+ case _ => Set ()
+ }
+}
+
+case object IdParser extends Parser[List[Token], String] {
+ def parse(ts: List[Token]) = ts match {
+ case T_ID(s)::ts => Set((s, ts))
+ case _ => Set ()
+ }
+}
+
+
+// arithmetic expressions
+lazy val AExp: Parser[List[Token], AExp] =
+ (T ~ T_OP("+") ~ AExp) ==> { case ((x, y), z) => Aop("+", x, z): AExp } ||
+ (T ~ T_OP("-") ~ AExp) ==> { case ((x, y), z) => Aop("-", x, z): AExp } || T
+lazy val T: Parser[List[Token], AExp] =
+ (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F
+lazy val F: Parser[List[Token], AExp] =
+ (T_LPAREN ~> AExp <~ T_RPAREN) ||
+ IdParser ==> Var ||
+ NumParser ==> Num
+
+// boolean expressions
+lazy val BExp: Parser[List[Token], BExp] =
+ (AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Bop("=", x, z): BExp } ||
+ (AExp ~ T_OP("!=") ~ AExp) ==> { case ((x, y), z) => Bop("!=", x, z): BExp } ||
+ (AExp ~ T_OP("<") ~ AExp) ==> { case ((x, y), z) => Bop("<", x, z): BExp } ||
+ (AExp ~ T_OP(">") ~ AExp) ==> { case ((x, y), z) => Bop(">", x, z): BExp } ||
+ (T_KWD("true") ==> ((_) => True)) ||
+ (T_KWD("false") ==> ((_) => False: BExp))
+
+lazy val Stmt: Parser[List[Token], Stmt] =
+ (T_KWD("skip") ==> ((_) => Skip: Stmt)) ||
+ (IdParser ~ T_OP(":=") ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } ||
+ (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Block ~ T_KWD("else") ~ Block) ==>
+ { case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } ||
+ (T_KWD("while") ~ BExp ~ T_KWD("do") ~ Block) ==> { case (((x, y), z), w) => While(y, w) }
+
+lazy val Stmts: Parser[List[Token], Block] =
+ (Stmt ~ T_SEMI ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } ||
+ (Stmt ==> ((s) => List(s) : Block))
+
+lazy val Block: Parser[List[Token], Block] =
+ (T_BEGIN ~> Stmts <~ T_END) ||
+ (Stmt ==> ((s) => List(s)))
+
+
+// examples
+val p1 = "x := 5"
+val p1_toks = Tok.fromString(p1)
+val p1_ast = Block.parse_all(p1_toks)
+println(p1_toks)
+println(p1_ast)
+
+val p1a = "{ x := 5; y := 8}"
+val p1a_toks = Tok.fromString(p1a)
+val p1a_ast = Block.parse_all(p1a_toks)
+println(p1a_ast)
+
+val p2 = "5 = 6"
+val p2_toks = Tok.fromString(p2)
+val p2_ast = BExp.parse_all(p2_toks)
+println(p2_ast)
+
+val p2a = "true"
+val p2a_toks = Tok.fromString(p2a)
+val p2a_ast = BExp.parse_all(p2a_toks)
+println(p2a_ast)
+
+val p3 = "if true then skip else skip"
+val p3_toks = Tok.fromString(p3)
+val p3_ast = Stmt.parse_all(p3_toks)
+println(p3_ast)
+
+val p3a = "if true then x := 5 else x := 10"
+val p3a_toks = Tok.fromString(p3a)
+val p3a_ast = Stmt.parse_all(p3a_toks)
+println(p3a_ast)
+
+val p3b = "if false then x := 5 else x := 10"
+val p3b_toks = Tok.fromString(p3b)
+val p3b_ast = Stmt.parse_all(p3b_toks)
+println(p3b_ast)
+
+// multiplication
+val p4 = """{ x := 5;
+ y := 4;
+ r := 0;
+ while y > 0 do {
+ r := r + x;
+ y := y - 1
+ }
+ }"""
+val p4_toks = Tok.fromString(p4)
+val p4_ast = Block.parse_all(p4_toks)
+println(p4_ast)
+
+val p5 = """
+ n := 9;
+ minus1 := 0;
+ minus2 := 1;
+ temp := 0;
+ while n > 0 do {
+ temp := minus2;
+ minus2 := minus1 + minus2;
+ minus1 := temp;
+ n := n - 1
+ };
+ fib_res := minus2
+"""
+val p5_toks = Tok.fromString(p5)
+val p5_ast = Stmts.parse_all(p5_toks)
+
+// interpreter
+type Env = Map[String, Int]
+
+def eval_bexp(b: BExp, env: Env) : Boolean = b match {
+ case True => true
+ case False => false
+ case Bop("=", a1, a2) => eval_aexp(a1, env) == eval_aexp(a2, env)
+ case Bop("!=", a1, a2) => !(eval_aexp(a1, env) == eval_aexp(a2, env))
+ case Bop(">", a1, a2) => eval_aexp(a1, env) > eval_aexp(a2, env)
+ case Bop("<", a1, a2) => eval_aexp(a1, env) < eval_aexp(a2, env)
+}
+
+def eval_aexp(a: AExp, env : Env) : Int = a match {
+ case Num(i) => i
+ case Var(s) => env(s)
+ case Aop("+", a1, a2) => eval_aexp(a1, env) + eval_aexp(a2, env)
+ case Aop("-", a1, a2) => eval_aexp(a1, env) - eval_aexp(a2, env)
+ case Aop("*", a1, a2) => eval_aexp(a1, env) * eval_aexp(a2, env)
+}
+
+def eval_stmt(s: Stmt, env: Env) : Env = s match {
+ case Skip => env
+ case Assign(x, a) => env + (x -> eval_aexp(a, env))
+ case If(b, bl1, bl2) => if (eval_bexp(b, env)) eval_bl(bl1, env) else eval_bl(bl2, env)
+ case While(b, bl) =>
+ if (eval_bexp(b, env)) eval_stmt(While(b, bl), eval_bl(bl, env))
+ else env
+}
+
+def eval_bl(bl: Block, env: Env) : Env = bl match {
+ case Nil => env
+ case s::bl => eval_bl(bl, eval_stmt(s, env))
+}
+
+//examples
+println(eval_stmt(p3a_ast.head, Map.empty))
+println(eval_stmt(p3b_ast.head, Map.empty))
+println(eval_bl(p4_ast.head, Map.empty))
+println(eval_bl(p5_ast.head, Map.empty))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/scala/while1.scala Sat Jun 15 09:11:11 2013 -0400
@@ -0,0 +1,220 @@
+// A parser and evaluator for the WHILE language
+//
+import matcher._
+import parser._
+
+
+// some regular expressions
+val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_")
+val DIGIT = RANGE("0123456789")
+val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT)))
+val NUM = PLUS(DIGIT)
+val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false", "write")
+val SEMI: Rexp = ";"
+val OP: Rexp = ALTS(":=", "=", "-", "+", "*", "!=", "<", ">")
+val WHITESPACE = PLUS(RANGE(" \n"))
+val RPAREN: Rexp = ")"
+val LPAREN: Rexp = "("
+val BEGIN: Rexp = "{"
+val END: Rexp = "}"
+val COMMENT = SEQS("/*", NOT(SEQS(STAR(ALLC), "*/", STAR(ALLC))), "*/")
+
+// tokens for classifying the strings that have been recognised
+abstract class Token
+case object T_WHITESPACE extends Token
+case object T_COMMENT extends Token
+case object T_SEMI extends Token
+case object T_LPAREN extends Token
+case object T_RPAREN extends Token
+case object T_BEGIN extends Token
+case object T_END extends Token
+case class T_ID(s: String) extends Token
+case class T_OP(s: String) extends Token
+case class T_NUM(s: String) extends Token
+case class T_KWD(s: String) extends Token
+
+val lexing_rules: List[(Rexp, List[Char] => Token)] =
+ List((KEYWORD, (s) => T_KWD(s.mkString)),
+ (ID, (s) => T_ID(s.mkString)),
+ (OP, (s) => T_OP(s.mkString)),
+ (NUM, (s) => T_NUM(s.mkString)),
+ (SEMI, (s) => T_SEMI),
+ (LPAREN, (s) => T_LPAREN),
+ (RPAREN, (s) => T_RPAREN),
+ (BEGIN, (s) => T_BEGIN),
+ (END, (s) => T_END),
+ (WHITESPACE, (s) => T_WHITESPACE),
+ (COMMENT, (s) => T_COMMENT))
+
+// the tokenizer
+val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE, T_COMMENT))
+
+// the abstract syntax trees
+abstract class Stmt
+abstract class AExp
+abstract class BExp
+type Block = List[Stmt]
+case object Skip extends Stmt
+case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt
+case class While(b: BExp, bl: Block) extends Stmt
+case class Assign(s: String, a: AExp) extends Stmt
+case class Write(s: String) extends Stmt
+
+case class Var(s: String) extends AExp
+case class Num(i: Int) extends AExp
+case class Aop(o: String, a1: AExp, a2: AExp) extends AExp
+
+case object True extends BExp
+case object False extends BExp
+case class Bop(o: String, a1: AExp, a2: AExp) extends BExp
+
+// atomic parsers
+case class TokParser(tok: Token) extends Parser[List[Token], Token] {
+ def parse(ts: List[Token]) = ts match {
+ case t::ts if (t == tok) => Set((t, ts))
+ case _ => Set ()
+ }
+}
+implicit def token2tparser(t: Token) = TokParser(t)
+
+case object NumParser extends Parser[List[Token], Int] {
+ def parse(ts: List[Token]) = ts match {
+ case T_NUM(s)::ts => Set((s.toInt, ts))
+ case _ => Set ()
+ }
+}
+
+case object IdParser extends Parser[List[Token], String] {
+ def parse(ts: List[Token]) = ts match {
+ case T_ID(s)::ts => Set((s, ts))
+ case _ => Set ()
+ }
+}
+
+
+// arithmetic expressions
+lazy val AExp: Parser[List[Token], AExp] =
+ (T ~ T_OP("+") ~ AExp) ==> { case ((x, y), z) => Aop("+", x, z): AExp } ||
+ (T ~ T_OP("-") ~ AExp) ==> { case ((x, y), z) => Aop("-", x, z): AExp } || T
+lazy val T: Parser[List[Token], AExp] =
+ (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F
+lazy val F: Parser[List[Token], AExp] =
+ (T_LPAREN ~> AExp <~ T_RPAREN) ||
+ IdParser ==> Var ||
+ NumParser ==> Num
+
+// boolean expressions
+lazy val BExp: Parser[List[Token], BExp] =
+ (T_KWD("true") ==> ((_) => True: BExp)) ||
+ (T_KWD("false") ==> ((_) => False: BExp)) ||
+ (T_LPAREN ~> BExp <~ T_RPAREN) ||
+ (AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Bop("=", x, z): BExp } ||
+ (AExp ~ T_OP("!=") ~ AExp) ==> { case ((x, y), z) => Bop("!=", x, z): BExp } ||
+ (AExp ~ T_OP("<") ~ AExp) ==> { case ((x, y), z) => Bop("<", x, z): BExp } ||
+ (AExp ~ T_OP(">") ~ AExp) ==> { case ((x, y), z) => Bop("<", z, x): BExp }
+
+lazy val Stmt: Parser[List[Token], Stmt] =
+ (T_KWD("skip") ==> ((_) => Skip: Stmt)) ||
+ (IdParser ~ T_OP(":=") ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } ||
+ (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Block ~ T_KWD("else") ~ Block) ==>
+ { case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } ||
+ (T_KWD("while") ~ BExp ~ T_KWD("do") ~ Block) ==> { case (((x, y), z), w) => While(y, w) } ||
+ (T_KWD("write") ~ IdParser) ==> { case (x, y) => Write(y) }
+
+lazy val Stmts: Parser[List[Token], Block] =
+ (Stmt ~ T_SEMI ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } ||
+ (Stmt ==> ((s) => List(s) : Block))
+
+lazy val Block: Parser[List[Token], Block] =
+ (T_BEGIN ~> Stmts <~ T_END) ||
+ (Stmt ==> ((s) => List(s)))
+
+// interpreter
+type Env = Map[String, Int]
+
+def eval_bexp(b: BExp, env: Env) : Boolean = b match {
+ case True => true
+ case False => false
+ case Bop("=", a1, a2) => eval_aexp(a1, env) == eval_aexp(a2, env)
+ case Bop("!=", a1, a2) => !(eval_aexp(a1, env) == eval_aexp(a2, env))
+ case Bop("<", a1, a2) => eval_aexp(a1, env) < eval_aexp(a2, env)
+}
+
+def eval_aexp(a: AExp, env : Env) : Int = a match {
+ case Num(i) => i
+ case Var(s) => env(s)
+ case Aop("+", a1, a2) => eval_aexp(a1, env) + eval_aexp(a2, env)
+ case Aop("-", a1, a2) => eval_aexp(a1, env) - eval_aexp(a2, env)
+ case Aop("*", a1, a2) => eval_aexp(a1, env) * eval_aexp(a2, env)
+}
+
+def eval_stmt(s: Stmt, env: Env) : Env = s match {
+ case Skip => env
+ case Assign(x, a) => env + (x -> eval_aexp(a, env))
+ case If(b, bl1, bl2) => if (eval_bexp(b, env)) eval_bl(bl1, env) else eval_bl(bl2, env)
+ case While(b, bl) =>
+ if (eval_bexp(b, env)) eval_stmt(While(b, bl), eval_bl(bl, env))
+ else env
+ case Write(x) => { println(env(x)); env }
+}
+
+def eval_bl(bl: Block, env: Env) : Env = bl match {
+ case Nil => env
+ case s::bl => eval_bl(bl, eval_stmt(s, env))
+}
+
+def eval_prog(name: String) : Env = {
+ val tks = Tok.fromFile(name)
+ val ast = Stmts.parse_single(tks)
+ eval_bl(ast, Map.empty)
+}
+
+
+//examples
+
+//eval_prog("loops.while")
+eval_prog("fib.while")
+
+
+def time_needed[T](i: Int, code: => T) = {
+ val start = System.nanoTime()
+ for (j <- 1 to i) code
+ val end = System.nanoTime()
+ (end - start)/(i * 1.0e9)
+}
+
+
+val test_prog = """
+start := XXX;
+x := start;
+y := start;
+z := start;
+while 0 < x do {
+ while 0 < y do {
+ while 0 < z do {
+ z := z - 1
+ };
+ z := start;
+ y := y - 1
+ };
+ y := start;
+ x := x - 1
+}
+"""
+
+
+
+def eval_test(n: Int) : Unit = {
+ val tks = Tok.fromString(test_prog.replaceAllLiterally("XXX", n.toString))
+ val ast = Stmts.parse_single(tks)
+ println(n + " " + time_needed(2, eval_bl(ast, Map.empty)))
+}
+
+List(1, 200, 400, 600, 800, 1000, 1200, 1400, 1600).map(eval_test(_))
+
+
+
+
+
+
+
--- a/scraper.scala Sun Dec 23 00:38:56 2012 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,57 +0,0 @@
-import java.io.OutputStreamWriter
-import java.net.URL
-import scala.io.Source.fromInputStream
-
-val url = new URL("http://www.envir.gov.cn/eng/airep/index.asp")
-
-//connect to url
-val conn = url.openConnection
-conn.setRequestProperty("User-Agent", "")
-conn.setDoOutput(true)
-conn.connect
-
-//sending data
-val wr = new OutputStreamWriter(conn.getOutputStream())
-//wr.write("Fdate=2012-9-24&Tdate=2012-09-25")
-//wr.write("Fdate=2012-9-18&Tdate=2012-09-25")
-wr.write("Fdate=2001-5-18&Tdate=2012-09-25")
-wr.flush
-wr.close
-
-//receiving data
-val page = fromInputStream(conn.getInputStream).getLines.mkString("\n")
-
-//println(page)
-
-// regular expression . excludes newlines,
-// therefore we have to use [\S\s]
-val regex1 = """<tr align="center">[\S\s]*?</tr>""".r
-val rows = regex1.findAllIn(page).toList
-
-//print(rows)
-
-val regex2 = """<td align="center">([\S\s]*?)</td>""".r
-
-def aux(s: String) : Array[String] = {
- for (m <- regex2.findAllIn(s).toArray) yield m match {
- case regex2(value) => value.trim
- }
-}
-
-val data = rows.map { aux }
-
-def compare(i: Int)(e: Array[String], f: Array[String]) = e(i).toInt < f(i).toInt
-
-//day with highest particle pollution (PM_10)
-data.sortWith(compare(1)).last
-
-//day with highest sulfur dioxide (SO_2)
-data.sortWith(compare(2)).last
-
-//day with highest nitro dioxide (NO_2)
-data.sortWith(compare(3)).last
-
-//days with highest PM_10
-val groups = data.groupBy(_(1).toInt)
-val max_key = groups.keySet.max
-groups(max_key)
--- a/while.scala Sun Dec 23 00:38:56 2012 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,231 +0,0 @@
-// A parser and evaluator for teh while language
-//
-import matcher._
-import parser._
-
-
-// some regular expressions
-val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_")
-val DIGIT = RANGE("0123456789")
-val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT)))
-val NUM = PLUS(DIGIT)
-val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false")
-val SEMI: Rexp = ";"
-val OP: Rexp = ALTS(":=", "=", "-", "+", "*", "!=", "<", ">")
-val WHITESPACE = PLUS(RANGE(" \n"))
-val RPAREN: Rexp = ")"
-val LPAREN: Rexp = "("
-val BEGIN: Rexp = "{"
-val END: Rexp = "}"
-
-// tokens for classifying the strings that have been recognised
-abstract class Token
-case object T_WHITESPACE extends Token
-case object T_SEMI extends Token
-case object T_LPAREN extends Token
-case object T_RPAREN extends Token
-case object T_BEGIN extends Token
-case object T_END extends Token
-case class T_ID(s: String) extends Token
-case class T_OP(s: String) extends Token
-case class T_NUM(s: String) extends Token
-case class T_KWD(s: String) extends Token
-
-val lexing_rules: List[(Rexp, List[Char] => Token)] =
- List((KEYWORD, (s) => T_KWD(s.mkString)),
- (ID, (s) => T_ID(s.mkString)),
- (OP, (s) => T_OP(s.mkString)),
- (NUM, (s) => T_NUM(s.mkString)),
- (SEMI, (s) => T_SEMI),
- (LPAREN, (s) => T_LPAREN),
- (RPAREN, (s) => T_RPAREN),
- (BEGIN, (s) => T_BEGIN),
- (END, (s) => T_END),
- (WHITESPACE, (s) => T_WHITESPACE))
-
-// the tokenizer
-val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE))
-
-// the abstract syntax trees
-abstract class Stmt
-abstract class AExp
-abstract class BExp
-type Block = List[Stmt]
-case object Skip extends Stmt
-case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt
-case class While(b: BExp, bl: Block) extends Stmt
-case class Assign(s: String, a: AExp) extends Stmt
-
-case class Var(s: String) extends AExp
-case class Num(i: Int) extends AExp
-case class Aop(o: String, a1: AExp, a2: AExp) extends AExp
-
-case object True extends BExp
-case object False extends BExp
-case class Bop(o: String, a1: AExp, a2: AExp) extends BExp
-
-// atomic parsers
-case class TokParser(tok: Token) extends Parser[List[Token], Token] {
- def parse(ts: List[Token]) = ts match {
- case t::ts if (t == tok) => Set((t, ts))
- case _ => Set ()
- }
-}
-implicit def token2tparser(t: Token) = TokParser(t)
-
-case object NumParser extends Parser[List[Token], Int] {
- def parse(ts: List[Token]) = ts match {
- case T_NUM(s)::ts => Set((s.toInt, ts))
- case _ => Set ()
- }
-}
-
-case object IdParser extends Parser[List[Token], String] {
- def parse(ts: List[Token]) = ts match {
- case T_ID(s)::ts => Set((s, ts))
- case _ => Set ()
- }
-}
-
-
-// arithmetic expressions
-lazy val AExp: Parser[List[Token], AExp] =
- (T ~ T_OP("+") ~ AExp) ==> { case ((x, y), z) => Aop("+", x, z): AExp } ||
- (T ~ T_OP("-") ~ AExp) ==> { case ((x, y), z) => Aop("-", x, z): AExp } || T
-lazy val T: Parser[List[Token], AExp] =
- (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F
-lazy val F: Parser[List[Token], AExp] =
- (T_LPAREN ~> AExp <~ T_RPAREN) ||
- IdParser ==> Var ||
- NumParser ==> Num
-
-// boolean expressions
-lazy val BExp: Parser[List[Token], BExp] =
- (AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Bop("=", x, z): BExp } ||
- (AExp ~ T_OP("!=") ~ AExp) ==> { case ((x, y), z) => Bop("!=", x, z): BExp } ||
- (AExp ~ T_OP("<") ~ AExp) ==> { case ((x, y), z) => Bop("<", x, z): BExp } ||
- (AExp ~ T_OP(">") ~ AExp) ==> { case ((x, y), z) => Bop(">", x, z): BExp } ||
- (T_KWD("true") ==> ((_) => True)) ||
- (T_KWD("false") ==> ((_) => False: BExp))
-
-lazy val Stmt: Parser[List[Token], Stmt] =
- (T_KWD("skip") ==> ((_) => Skip: Stmt)) ||
- (IdParser ~ T_OP(":=") ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } ||
- (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Block ~ T_KWD("else") ~ Block) ==>
- { case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } ||
- (T_KWD("while") ~ BExp ~ T_KWD("do") ~ Block) ==> { case (((x, y), z), w) => While(y, w) }
-
-lazy val Stmts: Parser[List[Token], Block] =
- (Stmt ~ T_SEMI ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } ||
- (Stmt ==> ((s) => List(s) : Block))
-
-lazy val Block: Parser[List[Token], Block] =
- (T_BEGIN ~> Stmts <~ T_END) ||
- (Stmt ==> ((s) => List(s)))
-
-
-// examples
-val p1 = "x := 5"
-val p1_toks = Tok.fromString(p1)
-val p1_ast = Block.parse_all(p1_toks)
-println(p1_toks)
-println(p1_ast)
-
-val p1a = "{ x := 5; y := 8}"
-val p1a_toks = Tok.fromString(p1a)
-val p1a_ast = Block.parse_all(p1a_toks)
-println(p1a_ast)
-
-val p2 = "5 = 6"
-val p2_toks = Tok.fromString(p2)
-val p2_ast = BExp.parse_all(p2_toks)
-println(p2_ast)
-
-val p2a = "true"
-val p2a_toks = Tok.fromString(p2a)
-val p2a_ast = BExp.parse_all(p2a_toks)
-println(p2a_ast)
-
-val p3 = "if true then skip else skip"
-val p3_toks = Tok.fromString(p3)
-val p3_ast = Stmt.parse_all(p3_toks)
-println(p3_ast)
-
-val p3a = "if true then x := 5 else x := 10"
-val p3a_toks = Tok.fromString(p3a)
-val p3a_ast = Stmt.parse_all(p3a_toks)
-println(p3a_ast)
-
-val p3b = "if false then x := 5 else x := 10"
-val p3b_toks = Tok.fromString(p3b)
-val p3b_ast = Stmt.parse_all(p3b_toks)
-println(p3b_ast)
-
-// multiplication
-val p4 = """{ x := 5;
- y := 4;
- r := 0;
- while y > 0 do {
- r := r + x;
- y := y - 1
- }
- }"""
-val p4_toks = Tok.fromString(p4)
-val p4_ast = Block.parse_all(p4_toks)
-println(p4_ast)
-
-val p5 = """
- n := 9;
- minus1 := 0;
- minus2 := 1;
- temp := 0;
- while n > 0 do {
- temp := minus2;
- minus2 := minus1 + minus2;
- minus1 := temp;
- n := n - 1
- };
- fib_res := minus2
-"""
-val p5_toks = Tok.fromString(p5)
-val p5_ast = Stmts.parse_all(p5_toks)
-
-// interpreter
-type Env = Map[String, Int]
-
-def eval_bexp(b: BExp, env: Env) : Boolean = b match {
- case True => true
- case False => false
- case Bop("=", a1, a2) => eval_aexp(a1, env) == eval_aexp(a2, env)
- case Bop("!=", a1, a2) => !(eval_aexp(a1, env) == eval_aexp(a2, env))
- case Bop(">", a1, a2) => eval_aexp(a1, env) > eval_aexp(a2, env)
- case Bop("<", a1, a2) => eval_aexp(a1, env) < eval_aexp(a2, env)
-}
-
-def eval_aexp(a: AExp, env : Env) : Int = a match {
- case Num(i) => i
- case Var(s) => env(s)
- case Aop("+", a1, a2) => eval_aexp(a1, env) + eval_aexp(a2, env)
- case Aop("-", a1, a2) => eval_aexp(a1, env) - eval_aexp(a2, env)
- case Aop("*", a1, a2) => eval_aexp(a1, env) * eval_aexp(a2, env)
-}
-
-def eval_stmt(s: Stmt, env: Env) : Env = s match {
- case Skip => env
- case Assign(x, a) => env + (x -> eval_aexp(a, env))
- case If(b, bl1, bl2) => if (eval_bexp(b, env)) eval_bl(bl1, env) else eval_bl(bl2, env)
- case While(b, bl) =>
- if (eval_bexp(b, env)) eval_stmt(While(b, bl), eval_bl(bl, env))
- else env
-}
-
-def eval_bl(bl: Block, env: Env) : Env = bl match {
- case Nil => env
- case s::bl => eval_bl(bl, eval_stmt(s, env))
-}
-
-//examples
-println(eval_stmt(p3a_ast.head, Map.empty))
-println(eval_stmt(p3b_ast.head, Map.empty))
-println(eval_bl(p4_ast.head, Map.empty))
-println(eval_bl(p5_ast.head, Map.empty))
--- a/while1.scala Sun Dec 23 00:38:56 2012 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,220 +0,0 @@
-// A parser and evaluator for the WHILE language
-//
-import matcher._
-import parser._
-
-
-// some regular expressions
-val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_")
-val DIGIT = RANGE("0123456789")
-val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT)))
-val NUM = PLUS(DIGIT)
-val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false", "write")
-val SEMI: Rexp = ";"
-val OP: Rexp = ALTS(":=", "=", "-", "+", "*", "!=", "<", ">")
-val WHITESPACE = PLUS(RANGE(" \n"))
-val RPAREN: Rexp = ")"
-val LPAREN: Rexp = "("
-val BEGIN: Rexp = "{"
-val END: Rexp = "}"
-val COMMENT = SEQS("/*", NOT(SEQS(STAR(ALLC), "*/", STAR(ALLC))), "*/")
-
-// tokens for classifying the strings that have been recognised
-abstract class Token
-case object T_WHITESPACE extends Token
-case object T_COMMENT extends Token
-case object T_SEMI extends Token
-case object T_LPAREN extends Token
-case object T_RPAREN extends Token
-case object T_BEGIN extends Token
-case object T_END extends Token
-case class T_ID(s: String) extends Token
-case class T_OP(s: String) extends Token
-case class T_NUM(s: String) extends Token
-case class T_KWD(s: String) extends Token
-
-val lexing_rules: List[(Rexp, List[Char] => Token)] =
- List((KEYWORD, (s) => T_KWD(s.mkString)),
- (ID, (s) => T_ID(s.mkString)),
- (OP, (s) => T_OP(s.mkString)),
- (NUM, (s) => T_NUM(s.mkString)),
- (SEMI, (s) => T_SEMI),
- (LPAREN, (s) => T_LPAREN),
- (RPAREN, (s) => T_RPAREN),
- (BEGIN, (s) => T_BEGIN),
- (END, (s) => T_END),
- (WHITESPACE, (s) => T_WHITESPACE),
- (COMMENT, (s) => T_COMMENT))
-
-// the tokenizer
-val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE, T_COMMENT))
-
-// the abstract syntax trees
-abstract class Stmt
-abstract class AExp
-abstract class BExp
-type Block = List[Stmt]
-case object Skip extends Stmt
-case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt
-case class While(b: BExp, bl: Block) extends Stmt
-case class Assign(s: String, a: AExp) extends Stmt
-case class Write(s: String) extends Stmt
-
-case class Var(s: String) extends AExp
-case class Num(i: Int) extends AExp
-case class Aop(o: String, a1: AExp, a2: AExp) extends AExp
-
-case object True extends BExp
-case object False extends BExp
-case class Bop(o: String, a1: AExp, a2: AExp) extends BExp
-
-// atomic parsers
-case class TokParser(tok: Token) extends Parser[List[Token], Token] {
- def parse(ts: List[Token]) = ts match {
- case t::ts if (t == tok) => Set((t, ts))
- case _ => Set ()
- }
-}
-implicit def token2tparser(t: Token) = TokParser(t)
-
-case object NumParser extends Parser[List[Token], Int] {
- def parse(ts: List[Token]) = ts match {
- case T_NUM(s)::ts => Set((s.toInt, ts))
- case _ => Set ()
- }
-}
-
-case object IdParser extends Parser[List[Token], String] {
- def parse(ts: List[Token]) = ts match {
- case T_ID(s)::ts => Set((s, ts))
- case _ => Set ()
- }
-}
-
-
-// arithmetic expressions
-lazy val AExp: Parser[List[Token], AExp] =
- (T ~ T_OP("+") ~ AExp) ==> { case ((x, y), z) => Aop("+", x, z): AExp } ||
- (T ~ T_OP("-") ~ AExp) ==> { case ((x, y), z) => Aop("-", x, z): AExp } || T
-lazy val T: Parser[List[Token], AExp] =
- (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F
-lazy val F: Parser[List[Token], AExp] =
- (T_LPAREN ~> AExp <~ T_RPAREN) ||
- IdParser ==> Var ||
- NumParser ==> Num
-
-// boolean expressions
-lazy val BExp: Parser[List[Token], BExp] =
- (T_KWD("true") ==> ((_) => True: BExp)) ||
- (T_KWD("false") ==> ((_) => False: BExp)) ||
- (T_LPAREN ~> BExp <~ T_RPAREN) ||
- (AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Bop("=", x, z): BExp } ||
- (AExp ~ T_OP("!=") ~ AExp) ==> { case ((x, y), z) => Bop("!=", x, z): BExp } ||
- (AExp ~ T_OP("<") ~ AExp) ==> { case ((x, y), z) => Bop("<", x, z): BExp } ||
- (AExp ~ T_OP(">") ~ AExp) ==> { case ((x, y), z) => Bop("<", z, x): BExp }
-
-lazy val Stmt: Parser[List[Token], Stmt] =
- (T_KWD("skip") ==> ((_) => Skip: Stmt)) ||
- (IdParser ~ T_OP(":=") ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } ||
- (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Block ~ T_KWD("else") ~ Block) ==>
- { case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } ||
- (T_KWD("while") ~ BExp ~ T_KWD("do") ~ Block) ==> { case (((x, y), z), w) => While(y, w) } ||
- (T_KWD("write") ~ IdParser) ==> { case (x, y) => Write(y) }
-
-lazy val Stmts: Parser[List[Token], Block] =
- (Stmt ~ T_SEMI ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } ||
- (Stmt ==> ((s) => List(s) : Block))
-
-lazy val Block: Parser[List[Token], Block] =
- (T_BEGIN ~> Stmts <~ T_END) ||
- (Stmt ==> ((s) => List(s)))
-
-// interpreter
-type Env = Map[String, Int]
-
-def eval_bexp(b: BExp, env: Env) : Boolean = b match {
- case True => true
- case False => false
- case Bop("=", a1, a2) => eval_aexp(a1, env) == eval_aexp(a2, env)
- case Bop("!=", a1, a2) => !(eval_aexp(a1, env) == eval_aexp(a2, env))
- case Bop("<", a1, a2) => eval_aexp(a1, env) < eval_aexp(a2, env)
-}
-
-def eval_aexp(a: AExp, env : Env) : Int = a match {
- case Num(i) => i
- case Var(s) => env(s)
- case Aop("+", a1, a2) => eval_aexp(a1, env) + eval_aexp(a2, env)
- case Aop("-", a1, a2) => eval_aexp(a1, env) - eval_aexp(a2, env)
- case Aop("*", a1, a2) => eval_aexp(a1, env) * eval_aexp(a2, env)
-}
-
-def eval_stmt(s: Stmt, env: Env) : Env = s match {
- case Skip => env
- case Assign(x, a) => env + (x -> eval_aexp(a, env))
- case If(b, bl1, bl2) => if (eval_bexp(b, env)) eval_bl(bl1, env) else eval_bl(bl2, env)
- case While(b, bl) =>
- if (eval_bexp(b, env)) eval_stmt(While(b, bl), eval_bl(bl, env))
- else env
- case Write(x) => { println(env(x)); env }
-}
-
-def eval_bl(bl: Block, env: Env) : Env = bl match {
- case Nil => env
- case s::bl => eval_bl(bl, eval_stmt(s, env))
-}
-
-def eval_prog(name: String) : Env = {
- val tks = Tok.fromFile(name)
- val ast = Stmts.parse_single(tks)
- eval_bl(ast, Map.empty)
-}
-
-
-//examples
-
-//eval_prog("loops.while")
-eval_prog("fib.while")
-
-
-def time_needed[T](i: Int, code: => T) = {
- val start = System.nanoTime()
- for (j <- 1 to i) code
- val end = System.nanoTime()
- (end - start)/(i * 1.0e9)
-}
-
-
-val test_prog = """
-start := XXX;
-x := start;
-y := start;
-z := start;
-while 0 < x do {
- while 0 < y do {
- while 0 < z do {
- z := z - 1
- };
- z := start;
- y := y - 1
- };
- y := start;
- x := x - 1
-}
-"""
-
-
-
-def eval_test(n: Int) : Unit = {
- val tks = Tok.fromString(test_prog.replaceAllLiterally("XXX", n.toString))
- val ast = Stmts.parse_single(tks)
- println(n + " " + time_needed(2, eval_bl(ast, Map.empty)))
-}
-
-List(1, 200, 400, 600, 800, 1000, 1200, 1400, 1600).map(eval_test(_))
-
-
-
-
-
-
-