progs/lecture3.scala
changeset 218 22705d22c105
parent 217 e689375abcc1
child 223 c6453f3547ec
equal deleted inserted replaced
217:e689375abcc1 218:22705d22c105
     5 // A Web Crawler / Email Harvester
     5 // A Web Crawler / Email Harvester
     6 //=================================
     6 //=================================
     7 //
     7 //
     8 // the idea is to look for links using the
     8 // the idea is to look for links using the
     9 // regular expression "https?://[^"]*" and for
     9 // regular expression "https?://[^"]*" and for
    10 // email addresses using another regex.
    10 // email addresses using yet another regex.
    11 
    11 
    12 import io.Source
    12 import io.Source
    13 import scala.util._
    13 import scala.util._
    14 
    14 
    15 // gets the first 10K of a web-page
    15 // gets the first 10K of a web-page
    20 
    20 
    21 // regex for URLs and emails
    21 // regex for URLs and emails
    22 val http_pattern = """"https?://[^"]*"""".r
    22 val http_pattern = """"https?://[^"]*"""".r
    23 val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r
    23 val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r
    24 
    24 
    25 //email_pattern.findAllIn
    25 //  val s = "foo bla christian@kcl.ac.uk 1234567"
    26 //  ("foo bla christian@kcl.ac.uk 1234567").toList
    26 //  email_pattern.findAllIn(s).toList
    27 
       
    28 
    27 
    29 // drops the first and last character from a string
    28 // drops the first and last character from a string
    30 def unquote(s: String) = s.drop(1).dropRight(1)
    29 def unquote(s: String) = s.drop(1).dropRight(1)
    31 
    30 
    32 def get_all_URLs(page: String): Set[String] = 
    31 def get_all_URLs(page: String): Set[String] = 
    33   http_pattern.findAllIn(page).map(unquote).toSet
    32   http_pattern.findAllIn(page).map(unquote).toSet
    34 
    33 
    35 // naive version of crawl - searches until a given depth,
    34 // naive version of crawl - searches until a given depth,
    36 // visits pages potentially more than once
    35 // visits pages potentially more than once
       
    36 
    37 def crawl(url: String, n: Int) : Set[String] = {
    37 def crawl(url: String, n: Int) : Set[String] = {
    38   if (n == 0) Set()
    38   if (n == 0) Set()
    39   else {
    39   else {
    40     println(s"  Visiting: $n $url")
    40     println(s"  Visiting: $n $url")
    41     val page = get_page(url)
    41     val page = get_page(url)
    42     val new_emails = email_pattern.findAllIn(page).toSet
    42     val new_emails = email_pattern.findAllIn(page).toSet
    43     new_emails ++ 
    43     new_emails ++ 
    44       (for (u <- get_all_URLs(page)) yield crawl(u, n - 1)).flatten
    44       (for (u <- get_all_URLs(page).par) yield crawl(u, n - 1)).flatten
    45   }
    45   }
    46 }
    46 }
    47 
    47 
    48 // some starting URLs for the crawler
    48 // some starting URLs for the crawler
    49 val startURL = """https://nms.kcl.ac.uk/christian.urban/"""
    49 val startURL = """https://nms.kcl.ac.uk/christian.urban/"""
    50 
       
    51 crawl(startURL, 2)
    50 crawl(startURL, 2)
    52 
    51 
    53 
    52 
    54 
    53 
    55 // User-defined Datatypes and Pattern Matching
    54 // User-defined Datatypes and Pattern Matching
    56 //============================================
    55 //=============================================
    57 
       
    58 
    56 
    59 abstract class Exp
    57 abstract class Exp
    60 case class N(n: Int) extends Exp
    58 case class N(n: Int) extends Exp                  // for numbers
    61 case class Plus(e1: Exp, e2: Exp) extends Exp
    59 case class Plus(e1: Exp, e2: Exp) extends Exp
    62 case class Times(e1: Exp, e2: Exp) extends Exp
    60 case class Times(e1: Exp, e2: Exp) extends Exp
    63 
    61 
    64 
    62 def string(e: Exp) : String = e match {
    65 
    63   case N(n) => n.toString
    66 // string of an Exp
    64   case Plus(e1, e2) => "(" + string(e1) + " + " + string(e2) + ")" 
    67 // eval of an Exp
    65   case Times(e1, e2) => "(" + string(e1) + " * " + string(e2) + ")" 
    68 // simp an Exp
    66 }
    69 // Tokens
    67 
    70 // Reverse Polish Notation
    68 val e = Plus(N(9), Times(N(3), N(4)))
    71 // compute RP
    69 println(string(e))
    72 // transform RP into Exp
    70 
    73 // process RP string and generate Exp
    71 def eval(e: Exp) : Int = e match {
       
    72   case N(n) => n
       
    73   case Plus(e1, e2) => eval(e1) + eval(e2) 
       
    74   case Times(e1, e2) => eval(e1) * eval(e2) 
       
    75 }
       
    76 
       
    77 def simp(e: Exp) : Exp = e match {
       
    78   case N(n) => N(n)
       
    79   case Plus(e1, e2) => (simp(e1), simp(e2)) match {
       
    80     case (N(0), e2s) => e2s
       
    81     case (e1s, N(0)) => e1s
       
    82     case (e1s, e2s) => Plus(e1s, e2s)
       
    83   }  
       
    84   case Times(e1, e2) => (simp(e1), simp(e2)) match {
       
    85     case (N(0), _) => N(0)
       
    86     case (_, N(0)) => N(0)
       
    87     case (N(1), e2s) => e2s
       
    88     case (e1s, N(1)) => e1s
       
    89     case (e1s, e2s) => Times(e1s, e2s)
       
    90   }  
       
    91 }
       
    92 
       
    93 println(eval(e))
       
    94 
       
    95 val e2 = Times(Plus(N(0), N(1)), Plus(N(0), N(9)))
       
    96 println(string(e2))
       
    97 println(string(simp(e2)))
       
    98 
       
    99 // Tokens and Reverse Polish Notation
       
   100 abstract class Token
       
   101 case class T(n: Int) extends Token
       
   102 case object PL extends Token
       
   103 case object TI extends Token
       
   104 
       
   105 def rp(e: Exp) : List[Token] = e match {
       
   106   case N(n) => List(T(n))
       
   107   case Plus(e1, e2) => rp(e1) ::: rp(e2) ::: List(PL) 
       
   108   case Times(e1, e2) => rp(e1) ::: rp(e2) ::: List(TI) 
       
   109 }
       
   110 println(string(e2))
       
   111 println(rp(e2))
       
   112 
       
   113 def comp(ls: List[Token], st: List[Int]) : Int = (ls, st) match {
       
   114   case (Nil, st) => st.head 
       
   115   case (T(n)::rest, st) => comp(rest, n::st)
       
   116   case (PL::rest, n1::n2::st) => comp(rest, n1 + n2::st)
       
   117   case (TI::rest, n1::n2::st) => comp(rest, n1 * n2::st)
       
   118 }
       
   119 
       
   120 comp(rp(e), Nil)
       
   121 
       
   122 def proc(s: String) : Token = s match {
       
   123   case  "+" => PL
       
   124   case  "*" => TI
       
   125   case  _ => T(s.toInt) 
       
   126 }
       
   127 
       
   128 comp("1 2 + 4 * 5 + 3 +".split(" ").toList.map(proc), Nil)
       
   129 
    74 
   130 
    75 
   131 
    76 
   132 
    77 def string(e: Exp) : String = e match {
   133 def string(e: Exp) : String = e match {
    78   case N(n) => n.toString
   134   case N(n) => n.toString
   157 
   213 
   158 
   214 
   159 def fact(n: Long): Long = 
   215 def fact(n: Long): Long = 
   160   if (n == 0) 1 else n * fact(n - 1)
   216   if (n == 0) 1 else n * fact(n - 1)
   161 
   217 
       
   218 def factB(n: BigInt): BigInt = 
       
   219   if (n == 0) 1 else n * factB(n - 1)
       
   220 
       
   221 factB(100000)
       
   222 
   162 fact(10)              //ok
   223 fact(10)              //ok
   163 fact(10000)           // produces a stackoverflow
   224 fact(10000)           // produces a stackoverflow
   164 
   225 
   165 def factT(n: BigInt, acc: BigInt): BigInt =
   226 def factT(n: BigInt, acc: BigInt): BigInt =
   166   if (n == 0) acc else factT(n - 1, n * acc)
   227   if (n == 0) acc else factT(n - 1, n * acc)
   167 
   228 
   168 factT(10, 1)
   229 factT(10, 1)
   169 factT(100000, 1)
   230 println(factT(100000, 1))
   170 
   231 
   171 // there is a flag for ensuring a function is tail recursive
   232 // there is a flag for ensuring a function is tail recursive
   172 import scala.annotation.tailrec
   233 import scala.annotation.tailrec
   173 
   234 
   174 @tailrec
   235 @tailrec
   190 
   251 
   191 
   252 
   192 // the first n prefixes of xs
   253 // the first n prefixes of xs
   193 // for 1 => include xs
   254 // for 1 => include xs
   194 
   255 
       
   256 
       
   257 
   195 def moves(xs: List[Int], n: Int) : List[List[Int]] = (xs, n) match {
   258 def moves(xs: List[Int], n: Int) : List[List[Int]] = (xs, n) match {
   196   case (Nil, _) => Nil
   259   case (Nil, _) => Nil
   197   case (xs, 0) => Nil
   260   case (xs, 0) => Nil
   198   case (x::xs, n) => (x::xs) :: moves(xs, n - 1)
   261   case (x::xs, n) => (x::xs) :: moves(xs, n - 1)
   199 }
   262 }
   202 moves(List(5,1,0), 1)
   265 moves(List(5,1,0), 1)
   203 moves(List(5,1,0), 2)
   266 moves(List(5,1,0), 2)
   204 moves(List(5,1,0), 5)
   267 moves(List(5,1,0), 5)
   205 
   268 
   206 // checks whether a jump tour exists at all
   269 // checks whether a jump tour exists at all
   207 // in the second case it needs to be < instead of <=
       
   208 
   270 
   209 def search(xs: List[Int]) : Boolean = xs match {
   271 def search(xs: List[Int]) : Boolean = xs match {
   210   case Nil => true
   272   case Nil => true
   211   case (x::xs) =>
   273   case (x::xs) =>
   212     if (xs.length < x) true else moves(xs, x).exists(search(_))
   274     if (xs.length < x) true else moves(xs, x).exists(search(_))
   233 
   295 
   234 def jumps(xs: List[Int]) : List[List[Int]] = xs match {
   296 def jumps(xs: List[Int]) : List[List[Int]] = xs match {
   235   case Nil => Nil
   297   case Nil => Nil
   236   case (x::xs) => {
   298   case (x::xs) => {
   237     val children = moves(xs, x)
   299     val children = moves(xs, x)
   238     val results = children.flatMap((cs) => jumps(cs).map(x :: _))
   300     val results = children.map((cs) => jumps(cs).map(x :: _)).flatten
   239     if (xs.length < x) List(x) :: results else results
   301     if (xs.length < x) List(x) :: results else results
   240   }
   302   }
   241 }
   303 }
   242 
   304 
   243 
   305 println(jumps(List(5,3,2,5,1,1)).minBy(_.length))
   244 
       
   245 jumps(List(5,3,2,5,1,1))
       
   246 jumps(List(3,5,1,2,1,2,1))
   306 jumps(List(3,5,1,2,1,2,1))
   247 jumps(List(3,5,1,2,3,4,1))
   307 jumps(List(3,5,1,2,3,4,1))
   248 jumps(List(3,5,1,0,0,0,1))
   308 jumps(List(3,5,1,0,0,0,1))
   249 jumps(List(3,5,1))
   309 jumps(List(3,5,1))
   250 jumps(List(5,1,1))
   310 jumps(List(5,1,1))
   313     (x0 until x0 + 3).toList.flatMap(x => ys.map(y => game(x + y * MaxValue)))
   373     (x0 until x0 + 3).toList.flatMap(x => ys.map(y => game(x + y * MaxValue)))
   314 }
   374 }
   315 
   375 
   316 //get_row(game0, 0)
   376 //get_row(game0, 0)
   317 //get_row(game0, 1)
   377 //get_row(game0, 1)
   318 //get_box(game0, (3,1))
   378 //get_col(game0, 0)
       
   379 //get_box(game0, (3, 1))
   319 
   380 
   320 
   381 
   321 // this is not mutable!!
   382 // this is not mutable!!
   322 def update(game: String, pos: Int, value: Char): String = 
   383 def update(game: String, pos: Int, value: Char): String = 
   323   game.updated(pos, value)
   384   game.updated(pos, value)
   324 
   385 
   325 def toAvoid(game: String, pos: Pos): List[Char] = 
   386 def toAvoid(game: String, pos: Pos): List[Char] = 
   326   (get_col(game, pos._1) ++ get_row(game, pos._2) ++ get_box(game, pos))
   387   (get_col(game, pos._1) ++ get_row(game, pos._2) ++ get_box(game, pos))
   327 
   388 
   328 def candidates(game: String, pos: Pos): List[Char] = 
   389 def candidates(game: String, pos: Pos): List[Char] = 
   329   allValues.diff(toAvoid(game,pos))
   390   allValues.diff(toAvoid(game, pos))
   330 
   391 
   331 //candidates(game0, (0,0))
   392 //candidates(game0, (0,0))
   332 
   393 
   333 def pretty(game: String): String = 
   394 def pretty(game: String): String = 
   334   "\n" + (game sliding (MaxValue, MaxValue) mkString "\n")
   395   "\n" + (game.sliding(MaxValue, MaxValue).mkString("\n"))
   335 
   396 
   336 /////////////////////
   397 
   337 // not tail recursive 
       
   338 def search(game: String): List[String] = {
   398 def search(game: String): List[String] = {
   339   if (isDone(game)) List(game)
   399   if (isDone(game)) List(game)
   340   else {
   400   else {
   341     val cs = candidates(game, emptyPosition(game))
   401     val cs = candidates(game, emptyPosition(game))
   342     cs.map(c => search(update(game, empty(game), c))).toList.flatten
   402     cs.par.map(c => search(update(game, empty(game), c))).toList.flatten
   343   }
   403   }
   344 }
   404 }
   345 
   405 
   346 search(game0).map(pretty)
   406 search(game0).map(pretty)
   347 
   407 
   377               |.......7.
   437               |.......7.
   378               |.3.5...8.
   438               |.3.5...8.
   379               |9724...5.""".stripMargin.replaceAll("\\n", "")
   439               |9724...5.""".stripMargin.replaceAll("\\n", "")
   380 
   440 
   381 
   441 
   382 
       
   383 
       
   384 search(game1).map(pretty)
   442 search(game1).map(pretty)
   385 search(game3).map(pretty)
   443 search(game3).map(pretty)
   386 search(game2).map(pretty)
   444 search(game2).map(pretty)
   387 
   445 
   388 // for measuring time
   446 // for measuring time