# HG changeset patch # User updated # Date 1544150330 0 # Node ID 046f37a262d0fab1616b5355c52e32d79e20d3e5 # Parent db4d2fcd8063efddb4918b949188d6e56dbe190a updated diff -r db4d2fcd8063 -r 046f37a262d0 progs/catastrophic.java --- a/progs/catastrophic.java Thu Dec 06 22:51:46 2018 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,40 +0,0 @@ -// a case of catastrophic backtracking in Java -// -// regexp: (a*)*b -// strings: aa.... - -import java.util.regex.*; - -public class catastrophic { - public static void main(String[] args) { - - //we always run all the tests twice -> warmup of the JVM - for (int runs = 0; runs < 3; runs++) { - - Pattern pattern = Pattern.compile("(a*)*b"); - - // Run from 5 to 28 characters - for (int length = 70000; length < 70001; length++) { - - // Build input of specified length - String input = ""; - for (int i = 0; i < length; i++) { input += "a"; } - - // Measure the average duration of two calls... - long start = System.nanoTime(); - for (int i = 0; i < 2; i++) { - pattern.matcher(input).find(); - } - - System.out.println(length + " " + input + ": " - + ((System.nanoTime() - start) / 3000000000d) - + "s"); - } - } - } -} - - - -// javac catastrophic.java -// java catastrophic diff -r db4d2fcd8063 -r 046f37a262d0 progs/catastrophic.py --- a/progs/catastrophic.py Thu Dec 06 22:51:46 2018 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ -#!/usr/bin/env python -import re -import sys - -# case of catastrophic backtracking in Python -# -# regex: (a*)*b -# strings: aa...a -# -# call with timing as: -# -# > time ./catastrophic.py 20 - -# counter n given on the command line -cn = sys.argv[1] - -# calling the matching function -s = ("a" * int(cn)) -m = re.match('(a*)*b' , s) - -print s diff -r db4d2fcd8063 -r 046f37a262d0 progs/lecture5.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/lecture5.scala Fri Dec 07 02:38:50 2018 +0000 @@ -0,0 +1,363 @@ +// Scala Lecture 5 +//================= + + + +// Laziness with style +//===================== + +// The concept of lazy evaluation doesn’t really exist in +// non-functional languages, but it is pretty easy to grasp. +// Consider first + +def square(x: Int) = x * x + +square(42 + 8) + +// this is called strict evaluation + +// pretty expensive operation +def peop(n: BigInt): Boolean = peop(n + 1) +val a = "foo" +val b = "foo" + +if (a == b || peop(0)) println("true") else println("false") + +// this is called lazy evaluation +// you delay compuation until it is really +// needed; once calculated though, does not +// need to be re-calculated + +// a useful example is +def time_needed[T](i: Int, code: => T) = { + val start = System.nanoTime() + for (j <- 1 to i) code + val end = System.nanoTime() + f"${(end - start) / (i * 1.0e9)}%.6f secs" +} + + +// streams (I do not care how many) +// primes: 2, 3, 5, 7, 9, 11, 13 .... + +def generatePrimes (s: Stream[Int]): Stream[Int] = + s.head #:: generatePrimes(s.tail.filter(_ % s.head != 0)) + +val primes: Stream[Int] = generatePrimes(Stream.from(2)) + +// the first 10 primes +primes.take(10).toList + +//primes.filter(_ > 100).take(2000).toList + +time_needed(1, primes.filter(_ > 100).take(3000).toList) +time_needed(1, primes.filter(_ > 100).take(3000).toList) + + +Stream.from(2) +Stream.from(2).take(10) +Stream.from(2).take(10).print +Stream.from(10).take(10).print + +Stream.from(2).take(10).force + +// itterative version of the Fibonacci numbers +def fibIter(a: BigInt, b: BigInt): Stream[BigInt] = + a #:: fibIter(b, a + b) + + +fibIter(1, 1).take(10).force +fibIter(8, 13).take(10).force + +fibIter(1, 1).drop(10000).take(1).print + + +// good for testing + + +// Regular expressions - the power of DSLs in Scala +// and Laziness +//================================================== + +abstract class Rexp +case object ZERO extends Rexp // nothing +case object ONE extends Rexp // the empty string +case class CHAR(c: Char) extends Rexp // a character c +case class ALT(r1: Rexp, r2: Rexp) extends Rexp // alternative r1 + r2 +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp // sequence r1 . r2 +case class STAR(r: Rexp) extends Rexp // star r* + + + +// writing (ab)* in the format above is +// tedious +val r0 = STAR(SEQ(CHAR('a'), CHAR('b'))) + + +// some convenience for typing in regular expressions +import scala.language.implicitConversions +import scala.language.reflectiveCalls + +def charlist2rexp(s: List[Char]): Rexp = s match { + case Nil => ONE + case c::Nil => CHAR(c) + case c::s => SEQ(CHAR(c), charlist2rexp(s)) +} +implicit def string2rexp(s: String): Rexp = + charlist2rexp(s.toList) + + +val r1 = STAR("ab") +val r2 = STAR(ALT("ab", "baa baa black sheep")) +val r3 = STAR(SEQ("ab", ALT("a", "b"))) + +implicit def RexpOps (r: Rexp) = new { + def | (s: Rexp) = ALT(r, s) + def % = STAR(r) + def ~ (s: Rexp) = SEQ(r, s) +} + + +implicit def stringOps (s: String) = new { + def | (r: Rexp) = ALT(s, r) + def | (r: String) = ALT(s, r) + def % = STAR(s) + def ~ (r: Rexp) = SEQ(s, r) + def ~ (r: String) = SEQ(s, r) +} + + +def depth(r: Rexp) : Int = r match { + case ZERO => 0 + case ONE => 0 + case CHAR(_) => 0 + case ALT(r1, r2) => Math.max(depth(r1), depth(r2)) + 1 + case SEQ(r1, r2) => Math.max(depth(r1), depth(r2)) + 1 + case STAR(r1) => depth(r1) + 1 +} + +//example regular expressions +val digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" +val sign = "+" | "-" | "" +val number = sign ~ digit ~ digit.% + +// task: enumerate exhaustively regular expression +// starting from small ones towards bigger ones. + +// 1st idea: enumerate them up to a level + +def enuml(l: Int, s: String) : Set[Rexp] = l match { + case 0 => Set(ZERO, ONE) ++ s.map(CHAR).toSet + case n => + val rs = enuml(n - 1, s) + rs ++ + (for (r1 <- rs; r2 <- rs) yield ALT(r1, r2)) ++ + (for (r1 <- rs; r2 <- rs) yield SEQ(r1, r2)) ++ + (for (r1 <- rs) yield STAR(r1)) +} + +enuml(1, "a").size +enuml(2, "a").size +enuml(3, "a").size // out of heap space + + +def enum(rs: Stream[Rexp]) : Stream[Rexp] = + rs #::: enum( (for (r1 <- rs; r2 <- rs) yield ALT(r1, r2)) #::: + (for (r1 <- rs; r2 <- rs) yield SEQ(r1, r2)) #::: + (for (r1 <- rs) yield STAR(r1)) ) + + +enum(ZERO #:: ONE #:: "ab".toStream.map(CHAR)).take(200).force +enum(ZERO #:: ONE #:: "ab".toStream.map(CHAR)).take(200000).force + + +val is = + (enum(ZERO #:: ONE #:: "ab".toStream.map(CHAR)) + .dropWhile(depth(_) < 3) + .take(10).foreach(println)) + + + +// Parsing - The Solved Problem That Isn't +//========================================= +// +// https://tratt.net/laurie/blog/entries/parsing_the_solved_problem_that_isnt.html +// +// Or, A topic of endless "fun"(?) + + +// input type: String +// output type: Int +Integer.parseInt("123456") + +/* Note, in the previous lectures I did not show the type consraint + * I <% Seq[_] , which means that the input type I can be + * treated, or seen, as a sequence. */ + +abstract class Parser[I <% Seq[_], T] { + def parse(ts: I): Set[(T, I)] + + def parse_all(ts: I) : Set[T] = + for ((head, tail) <- parse(ts); + if (tail.isEmpty)) yield head +} + +// the idea is that a parser can parse something +// from the input and leaves something unparsed => pairs + +class AltParser[I <% Seq[_], T]( + p: => Parser[I, T], + q: => Parser[I, T]) extends Parser[I, T] { + + def parse(sb: I) = p.parse(sb) ++ q.parse(sb) +} + + +class SeqParser[I <% Seq[_], T, S]( + p: => Parser[I, T], + q: => Parser[I, S]) extends Parser[I, (T, S)] { + + def parse(sb: I) = + for ((head1, tail1) <- p.parse(sb); + (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2) +} + + +class FunParser[I <% Seq[_], T, S]( + p: => Parser[I, T], + f: T => S) extends Parser[I, S] { + + def parse(sb: I) = + for ((head, tail) <- p.parse(sb)) yield (f(head), tail) +} + + +implicit def ParserOps[I<% Seq[_], T](p: Parser[I, T]) = new { + def | (q : => Parser[I, T]) = new AltParser[I, T](p, q) + def ==>[S] (f: => T => S) = new FunParser[I, T, S](p, f) + def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q) +} + +implicit def StringOps(s: String) = new { + def | (q : => Parser[String, String]) = new AltParser[String, String](s, q) + def | (r: String) = new AltParser[String, String](s, r) + def ==>[S] (f: => String => S) = new FunParser[String, String, S](s, f) + def ~[S] (q : => Parser[String, S]) = + new SeqParser[String, String, S](s, q) + def ~ (r: String) = + new SeqParser[String, String, String](s, r) +} + + +// atomic parsers +case class CharParser(c: Char) extends Parser[String, Char] { + def parse(sb: String) = + if (sb != "" && sb.head == c) Set((c, sb.tail)) else Set() +} + +import scala.util.matching.Regex +case class RegexParser(reg: Regex) extends Parser[String, String] { + def parse(sb: String) = reg.findPrefixMatchOf(sb) match { + case None => Set() + case Some(m) => Set((m.matched, m.after.toString)) + } +} + +val NumParser = RegexParser("[0-9]+".r) +def StringParser(s: String) = RegexParser(Regex.quote(s).r) + +println(NumParser.parse_all("12345")) +println(NumParser.parse_all("12u45")) + + +// convenience +implicit def string2parser(s: String) = StringParser(s) +implicit def char2parser(c: Char) = CharParser(c) + +implicit def ParserOps[I<% Seq[_], T](p: Parser[I, T]) = new { + def | (q : => Parser[I, T]) = new AltParser[I, T](p, q) + def ==>[S] (f: => T => S) = new FunParser[I, T, S](p, f) + def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q) +} + +implicit def StringOps(s: String) = new { + def | (q : => Parser[String, String]) = new AltParser[String, String](s, q) + def | (r: String) = new AltParser[String, String](s, r) + def ==>[S] (f: => String => S) = new FunParser[String, String, S](s, f) + def ~[S] (q : => Parser[String, S]) = + new SeqParser[String, String, S](s, q) + def ~ (r: String) = + new SeqParser[String, String, String](s, r) +} + + +val NumParserInt = NumParser ==> (s => s.toInt) + +NumParser.parse_all("12345") +NumParserInt.parse_all("12345") +NumParserInt.parse_all("12u45") + + +// grammar for arithmetic expressions +// +// E ::= T + E | T - E | T +// T ::= F * T | F +// F ::= ( E ) | Number + + +lazy val E: Parser[String, Int] = + (T ~ "+" ~ E) ==> { case ((x, y), z) => x + z } | + (T ~ "-" ~ E) ==> { case ((x, y), z) => x - z } | T +lazy val T: Parser[String, Int] = + (F ~ "*" ~ T) ==> { case ((x, y), z) => x * z } | F +lazy val F: Parser[String, Int] = + ("(" ~ E ~ ")") ==> { case ((x, y), z) => y } | NumParserInt + +println(E.parse_all("1+3+4")) +println(E.parse_all("4*2+3")) +println(E.parse_all("4*(2+3)")) +println(E.parse_all("(4)*((2+3))")) +println(E.parse_all("4/2+3")) +println(E.parse_all("(1+2)+3")) +println(E.parse_all("1+2+3")) + + + + + +// The End ... Almost Christimas +//=============================== + +// I hope you had fun! + +// A function should do one thing, and only one thing. + +// Make your variables immutable, unless there's a good +// reason not to. + +// I did it, but this is actually not a good reason: +// generating new labels +var counter = -1 + +def Fresh(x: String) = { + counter += 1 + x ++ "_" ++ counter.toString() +} + +Fresh("x") +Fresh("x") + + + +// You can be productive on Day 1, but the language is deep. +// +// http://scalapuzzlers.com +// +// http://www.latkin.org/blog/2017/05/02/when-the-scala-compiler-doesnt-help/ + +List(1, 2, 3) contains "your mom" + +// I like best about Scala that it lets me often write +// concise, readable code. And it hooks up with the +// Isabelle theorem prover. +