progs/re.scala
author Christian Urban <christian.urban@kcl.ac.uk>
Wed, 06 Mar 2024 18:20:25 +0000
changeset 484 c4561fc667b7
parent 68 8da9e0c16194
permissions -rw-r--r--
test
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
68
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
     1
// Part 1 about Regular Expression Matching
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
     2
//==========================================
4
f31c22f4f104 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     3
f31c22f4f104 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     4
abstract class Rexp
f31c22f4f104 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     5
case object ZERO extends Rexp
f31c22f4f104 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     6
case object ONE extends Rexp
f31c22f4f104 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     7
case class CHAR(c: Char) extends Rexp
68
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
     8
case class ALT(r1: Rexp, r2: Rexp) extends Rexp   // alternative 
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
     9
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp   // sequence
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    10
case class STAR(r: Rexp) extends Rexp             // star
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    11
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    12
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    13
// some convenience for typing in regular expressions
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    14
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    15
import scala.language.implicitConversions    
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    16
import scala.language.reflectiveCalls 
4
f31c22f4f104 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    17
68
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    18
def charlist2rexp(s: List[Char]): Rexp = s match {
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    19
  case Nil => ONE
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    20
  case c::Nil => CHAR(c)
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    21
  case c::s => SEQ(CHAR(c), charlist2rexp(s))
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    22
}
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    23
implicit def string2rexp(s: String): Rexp = charlist2rexp(s.toList)
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    24
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    25
implicit def RexpOps (r: Rexp) = new {
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    26
  def | (s: Rexp) = ALT(r, s)
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    27
  def % = STAR(r)
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    28
  def ~ (s: Rexp) = SEQ(r, s)
4
f31c22f4f104 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    29
}
f31c22f4f104 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    30
68
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    31
implicit def stringOps (s: String) = new {
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    32
  def | (r: Rexp) = ALT(s, r)
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    33
  def | (r: String) = ALT(s, r)
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    34
  def % = STAR(s)
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    35
  def ~ (r: Rexp) = SEQ(s, r)
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    36
  def ~ (r: String) = SEQ(s, r)
4
f31c22f4f104 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    37
}
f31c22f4f104 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    38
68
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    39
// (1a) Complete the function nullable according to
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    40
// the definition given in the coursework; this 
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    41
// function checks whether a regular expression
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    42
// can match the empty string
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    43
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    44
def nullable (r: Rexp) : Boolean = ...
4
f31c22f4f104 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    45
f31c22f4f104 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    46
68
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    47
// (1b) Complete the function der according to
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    48
// the definition given in the coursework; this
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    49
// function calculates the derivative of a 
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    50
// regular expression w.r.t. a character
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    51
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    52
def der (c: Char, r: Rexp) : Rexp = ...
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    53
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    54
// (1c) Complete the function der according to
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    55
// the specification given in the coursework; this
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    56
// function simplifies a regular expression;
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    57
// however it does not simplify inside STAR-regular
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    58
// expressions
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    59
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    60
def simp(r: Rexp) : Rexp = ... 
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    61
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    62
// (1d) Complete the two functions below; the first 
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    63
// calculates the derivative w.r.t. a string; the second
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    64
// is the regular expression matcher taking a regular
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    65
// expression and a string and checks whether the
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    66
// string matches the regular expression
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    67
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    68
def ders (s: List[Char], r: Rexp) : Rexp = ... 
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    69
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    70
def matcher(r: Rexp, s: String): Boolean = ...
4
f31c22f4f104 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    71
f31c22f4f104 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    72
68
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    73
// (1e) Complete the function below: it searches (from the left to 
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    74
// right) in string s1 all the non-empty substrings that match the 
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    75
// regular expression -- these substrings are assumed to be
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    76
// the longest substrings matched by the regular expression and
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    77
// assumed to be non-overlapping. All these substrings in s1 are replaced
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    78
// by s2.
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    79
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    80
def replace(r: Rexp, s1: String, s2: String): String = ...
4
f31c22f4f104 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    81
68
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    82
4
f31c22f4f104 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    83
68
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    84
// some testing data
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    85
// the supposedly 'evil' regular expression (a*)* b
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    86
/*
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    87
val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    88
println(matcher(EVIL, "a" * 1000 ++ "b"))
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    89
println(matcher(EVIL, "a" * 1000))
4
f31c22f4f104 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    90
f31c22f4f104 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    91
f31c22f4f104 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    92
def time_needed[T](i: Int, code: => T) = {
f31c22f4f104 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    93
  val start = System.nanoTime()
f31c22f4f104 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    94
  for (j <- 1 to i) code
f31c22f4f104 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    95
  val end = System.nanoTime()
f31c22f4f104 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    96
  (end - start)/(i * 1.0e9)
f31c22f4f104 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    97
}
f31c22f4f104 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    98
68
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
    99
for (i <- 1 to 5000001 by 500000) {
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
   100
  println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL, "a" * i))))
4
f31c22f4f104 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   101
}
68
8da9e0c16194 updated
Christian Urban <urbanc@in.tum.de>
parents: 62
diff changeset
   102
*/
4
f31c22f4f104 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   103
f31c22f4f104 updated
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   104