725
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
     1  | 
// A Version with an explicit n-times regular expression;
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
     2  | 
// this keeps the size of the regular expression in the
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
     3  | 
// EVIL1 test-case quite small
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
     4  | 
//
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
     5  | 
// call the test cases with X = {1,2}
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
     6  | 
//
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
     7  | 
//   amm re2.sc testX
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
     8  | 
//
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
     9  | 
// or 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    10  | 
//
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    11  | 
//   amm re2.sc all
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    12  | 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    13  | 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    14  | 
abstract class Rexp 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    15  | 
case object ZERO extends Rexp
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    16  | 
case object ONE extends Rexp
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    17  | 
case class CHAR(c: Char) extends Rexp
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    18  | 
case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    19  | 
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    20  | 
case class STAR(r: Rexp) extends Rexp 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    21  | 
case class NTIMES(r: Rexp, n: Int) extends Rexp   //explicit constructor for n-times
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    22  | 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    23  | 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    24  | 
def nullable (r: Rexp) : Boolean = r match {
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    25  | 
  case ZERO => false
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    26  | 
  case ONE => true
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    27  | 
  case CHAR(_) => false
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    28  | 
  case ALT(r1, r2) => nullable(r1) || nullable(r2)
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    29  | 
  case SEQ(r1, r2) => nullable(r1) && nullable(r2)
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    30  | 
  case STAR(_) => true
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    31  | 
  case NTIMES(r, i) => if (i == 0) true else nullable(r)
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    32  | 
}
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    33  | 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    34  | 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    35  | 
def der (c: Char, r: Rexp) : Rexp = r match {
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    36  | 
  case ZERO => ZERO
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    37  | 
  case ONE => ZERO
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    38  | 
  case CHAR(d) => if (c == d) ONE else ZERO
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    39  | 
  case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    40  | 
  case SEQ(r1, r2) => 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    41  | 
    if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    42  | 
    else SEQ(der(c, r1), r2)
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    43  | 
  case STAR(r1) => SEQ(der(c, r1), STAR(r1))
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    44  | 
  case NTIMES(r1, i) => 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    45  | 
    if (i == 0) ZERO else SEQ(der(c, r1), NTIMES(r1, i - 1))
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    46  | 
}
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    47  | 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    48  | 
def ders (s: List[Char], r: Rexp) : Rexp = s match {
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    49  | 
  case Nil => r
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    50  | 
  case c::s => ders(s, der(c, r))
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    51  | 
}
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    52  | 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    53  | 
def matcher(r: Rexp, s: String) : Boolean = 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    54  | 
  nullable(ders(s.toList, r))
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    55  | 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    56  | 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    57  | 
// the optional regular expression: one or zero times
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    58  | 
// this regular expression is still defined in terms of ALT
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    59  | 
def OPT(r: Rexp) = ALT(r, ONE)
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    60  | 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    61  | 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    62  | 
// Test Cases
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    63  | 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    64  | 
// evil regular expressions
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    65  | 
def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n))
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    66  | 
val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    67  | 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    68  | 
def time_needed[T](i: Int, code: => T) = {
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    69  | 
  val start = System.nanoTime()
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    70  | 
  for (j <- 1 to i) code
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    71  | 
  val end = System.nanoTime()
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    72  | 
  (end - start) / (i * 1.0e9)
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    73  | 
}
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    74  | 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    75  | 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    76  | 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    77  | 
@doc("Test (a?{n}) (a{n})")
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    78  | 
@main
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    79  | 
def test1() = {
 | 
| 
742
 | 
    80  | 
  println("Test (a?{n}) (a{n})")
 | 
| 
 | 
    81  | 
  | 
725
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    82  | 
  for (i <- 0 to 1000 by 100) {
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    83  | 
    println(f"$i: ${time_needed(2, matcher(EVIL1(i), "a" * i))}%.5f")
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    84  | 
  }
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    85  | 
}  
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    86  | 
  | 
| 
742
 | 
    87  | 
  | 
725
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    88  | 
@doc("Test (a*)* b")
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    89  | 
@main
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    90  | 
def test2() = {
 | 
| 
742
 | 
    91  | 
  println("Test (a*)* b")
 | 
| 
 | 
    92  | 
  | 
| 
 | 
    93  | 
  for (i <- 0 to 20 by 2) {
 | 
725
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    94  | 
    println(f"$i: ${time_needed(2, matcher(EVIL2, "a" * i))}%.5f")
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    95  | 
  }
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    96  | 
}
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    97  | 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    98  | 
// the size of a regular expressions - for testing purposes 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
    99  | 
def size(r: Rexp) : Int = r match {
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   100  | 
  case ZERO => 1
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   101  | 
  case ONE => 1
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   102  | 
  case CHAR(_) => 1
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   103  | 
  case ALT(r1, r2) => 1 + size(r1) + size(r2)
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   104  | 
  case SEQ(r1, r2) => 1 + size(r1) + size(r2)
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   105  | 
  case STAR(r) => 1 + size(r)
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   106  | 
  case NTIMES(r, _) => 1 + size(r)
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   107  | 
}
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   108  | 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   109  | 
// EVIL1(n) has now a constant size, no matter
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   110  | 
// what n is; also the derivative only grows 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   111  | 
// moderately 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   112  | 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   113  | 
size(EVIL1(1))  // 7
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   114  | 
size(EVIL1(3))  // 7
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   115  | 
size(EVIL1(5))  // 7
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   116  | 
size(EVIL1(7))  // 7
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   117  | 
size(EVIL1(20)) // 7
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   118  | 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   119  | 
size(ders("".toList, EVIL1(5)))       // 7
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   120  | 
size(ders("a".toList, EVIL1(5)))      // 16
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   121  | 
size(ders("aa".toList, EVIL1(5)))     // 35
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   122  | 
size(ders("aaa".toList, EVIL1(5)))    // 59
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   123  | 
size(ders("aaaa".toList, EVIL1(5)))   // 88
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   124  | 
size(ders("aaaaa".toList, EVIL1(5)))  // 122
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   125  | 
size(ders("aaaaaa".toList, EVIL1(5))) // 151
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   126  | 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   127  | 
// but the size of the derivatives can still grow 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   128  | 
// quite dramatically in case of EVIL2
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   129  | 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   130  | 
size(ders("".toList, EVIL2))       // 5
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   131  | 
size(ders("a".toList, EVIL2))      // 12
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   132  | 
size(ders("aa".toList, EVIL2))     // 28
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   133  | 
size(ders("aaa".toList, EVIL2))    // 58
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   134  | 
size(ders("aaaa".toList, EVIL2))   // 116
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   135  | 
size(ders("aaaaa".toList, EVIL2))  // 230
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   136  | 
size(ders("aaaaaa".toList, EVIL2)) // 456
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   137  | 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   138  | 
size(ders(("a" * 20).toList, EVIL2)) // 7340068
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   139  | 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   140  | 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   141  | 
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   142  | 
@doc("All tests.")
 | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   143  | 
@main
  | 
Christian Urban <christian.urban@kcl.ac.uk> 
parents:  
diff
changeset
 
 | 
   144  | 
def all() = { test1(); test2() }  |