progs/matcher/re1.sc
author Christian Urban <christian.urban@kcl.ac.uk>
Fri, 03 Oct 2025 10:10:33 +0100
changeset 998 69eddde11a65
parent 981 14e5ae1fb541
permissions -rw-r--r--
updated
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     1
// A simple matcher for basic regular expressions
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     2
//
919
53f08d873e09 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 913
diff changeset
     3
// Call the testcases with X = {1,2,3}
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     4
//
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     5
//   amm re1.sc testX
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     6
//
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     7
// or 
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     8
//
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     9
//   amm re1.sc all
826
b0352633bf48 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 825
diff changeset
    10
//
b0352633bf48 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 825
diff changeset
    11
998
69eddde11a65 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 981
diff changeset
    12
981
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
    13
// regular expressions (as enum in Scala 3)
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
    14
enum Rexp {
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
    15
  case ZERO                     // matches nothing
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
    16
  case ONE                      // matches an empty string
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
    17
  case CHAR(c: Char)            // matches a character c
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
    18
  case ALT(r1: Rexp, r2: Rexp)  // alternative
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
    19
  case SEQ(r1: Rexp, r2: Rexp)  // sequence
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
    20
  case STAR(r: Rexp)            // star
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
    21
}
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
    22
import Rexp._
919
53f08d873e09 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 913
diff changeset
    23
998
69eddde11a65 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 981
diff changeset
    24
69eddde11a65 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 981
diff changeset
    25
/* 
69eddde11a65 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 981
diff changeset
    26
UPDATE: The videos and handouts still us the older syntax 
69eddde11a65 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 981
diff changeset
    27
with classes, which still works but is more verbose
69eddde11a65 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 981
diff changeset
    28
69eddde11a65 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 981
diff changeset
    29
abstract class Rexp
69eddde11a65 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 981
diff changeset
    30
case object ZERO extends Rexp
69eddde11a65 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 981
diff changeset
    31
case object ONE extends Rexp
69eddde11a65 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 981
diff changeset
    32
case class CHAR(c: Char) extends Rexp
69eddde11a65 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 981
diff changeset
    33
case class ALT(r1: Rexp, r2: Rexp) extends Rexp
69eddde11a65 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 981
diff changeset
    34
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
69eddde11a65 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 981
diff changeset
    35
case class STAR(r: Rexp) extends Rexp
69eddde11a65 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 981
diff changeset
    36
*/
69eddde11a65 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 981
diff changeset
    37
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    38
// nullable function: tests whether a regular 
919
53f08d873e09 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 913
diff changeset
    39
// expression can recognise the empty string  
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    40
def nullable(r: Rexp) : Boolean = r match {
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    41
  case ZERO => false
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    42
  case ONE => true
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    43
  case CHAR(_) => false
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    44
  case ALT(r1, r2) => nullable(r1) || nullable(r2)
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    45
  case SEQ(r1, r2) => nullable(r1) && nullable(r2)
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    46
  case STAR(_) => true
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    47
}
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    48
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    49
// the derivative of a regular expression w.r.t. a character
825
dca072e2bb7d updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 769
diff changeset
    50
def der(c: Char, r: Rexp) : Rexp = r match {
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    51
  case ZERO => ZERO
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    52
  case ONE => ZERO
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    53
  case CHAR(d) => if (c == d) ONE else ZERO
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    54
  case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    55
  case SEQ(r1, r2) => 
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    56
    if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    57
    else SEQ(der(c, r1), r2)
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    58
  case STAR(r1) => SEQ(der(c, r1), STAR(r1))
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    59
}
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    60
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    61
// the derivative w.r.t. a string (iterates der)
825
dca072e2bb7d updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 769
diff changeset
    62
def ders(s: List[Char], r: Rexp) : Rexp = s match {
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    63
  case Nil => r
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    64
  case c::s => ders(s, der(c, r))
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    65
}
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    66
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    67
// the main matcher function
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    68
def matcher(r: Rexp, s: String) : Boolean = 
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    69
  nullable(ders(s.toList, r))
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    70
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    71
919
53f08d873e09 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 913
diff changeset
    72
// some examples from the homework
871
94b84d880c2b updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 826
diff changeset
    73
941
66adcae6c762 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 929
diff changeset
    74
val r = SEQ(CHAR('a'), CHAR('c'))
913
eef6a56c185a updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 882
diff changeset
    75
matcher(r, "ac")
871
94b84d880c2b updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 826
diff changeset
    76
919
53f08d873e09 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 913
diff changeset
    77
val r1 = STAR(ALT(SEQ(CHAR('a'), CHAR('b')), CHAR('b')))
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    78
der('a', r)
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    79
der('b', r)
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    80
der('c', r)
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    81
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    82
val r2 = SEQ(SEQ(CHAR('x'), CHAR('y')), CHAR('z'))
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    83
der('x', r2)
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    84
der('y', der('x', r2))
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    85
der('z', der('y', der('x', r2)))
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    86
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    87
919
53f08d873e09 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 913
diff changeset
    88
// Test Cases
53f08d873e09 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 913
diff changeset
    89
//============
53f08d873e09 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 913
diff changeset
    90
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    91
// the optional regular expression (one or zero times)
765
b294cfbb5c01 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 759
diff changeset
    92
def OPT(r: Rexp) = ALT(r, ONE)   // r + 1
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    93
929
9541e073f2ed updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 928
diff changeset
    94
// the n-times regular expression (explicitly expanded to SEQs)
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    95
def NTIMES(r: Rexp, n: Int) : Rexp = n match {
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    96
  case 0 => ONE
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    97
  case 1 => r
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    98
  case n => SEQ(r, NTIMES(r, n - 1))
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    99
}
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   100
765
b294cfbb5c01 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 759
diff changeset
   101
// the evil regular expression  (a?){n} a{n}
b294cfbb5c01 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 759
diff changeset
   102
def EVIL1(n: Int) = 
b294cfbb5c01 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 759
diff changeset
   103
  SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n))
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   104
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   105
// the evil regular expression (a*)* b
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   106
val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   107
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   108
// for measuring time
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   109
def time_needed[T](i: Int, code: => T) = {
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   110
  val start = System.nanoTime()
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   111
  for (j <- 1 to i) code
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   112
  val end = System.nanoTime()
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   113
  (end - start) / (i * 1.0e9)
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   114
}
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   115
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   116
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   117
// test: (a?{n}) (a{n})
965
94f5cce73a4f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 961
diff changeset
   118
@main
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   119
def test1() = {
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   120
  println("Test (a?{n}) (a{n})")
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   121
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   122
  for (i <- 0 to 20 by 2) {
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   123
    println(f"$i: ${time_needed(2, matcher(EVIL1(i), "a" * i))}%.5f")
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   124
  }
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   125
}
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   126
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   127
// test: (a*)* b
965
94f5cce73a4f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 961
diff changeset
   128
@main
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   129
def test2() = {
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   130
  println("Test (a*)* b")
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   131
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   132
  for (i <- 0 to 20 by 2) {
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   133
    println(f"$i: ${time_needed(2, matcher(EVIL2, "a" * i))}%.5f")
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   134
  }
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   135
}
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   136
928
2f3c077359c4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   137
2f3c077359c4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   138
2f3c077359c4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   139
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   140
// the size of a regular expressions - for testing purposes 
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   141
def size(r: Rexp) : Int = r match {
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   142
  case ZERO => 1
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   143
  case ONE => 1
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   144
  case CHAR(_) => 1
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   145
  case ALT(r1, r2) => 1 + size(r1) + size(r2)
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   146
  case SEQ(r1, r2) => 1 + size(r1) + size(r2)
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   147
  case STAR(r) => 1 + size(r)
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   148
}
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   149
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   150
// the expicit expansion in EVIL1(n) increases
769
f9686b22db7e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 765
diff changeset
   151
// drastically its size - (a?){n} a{n}
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   152
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   153
size(EVIL1(1))  // 5
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   154
size(EVIL1(3))  // 17
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   155
size(EVIL1(5))  // 29
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   156
size(EVIL1(7))  // 41
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   157
size(EVIL1(20)) // 119
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   158
769
f9686b22db7e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 765
diff changeset
   159
size(ders(("a" * 20).toList, EVIL1(20))) 
f9686b22db7e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 765
diff changeset
   160
f9686b22db7e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 765
diff changeset
   161
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   162
// given a regular expression and building successive
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   163
// derivatives might result into bigger and bigger
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   164
// regular expressions...here is an example for this:
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   165
928
2f3c077359c4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   166
2f3c077359c4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   167
// (a + aa)*
2f3c077359c4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   168
val BIG = STAR(ALT(CHAR('a'), SEQ(CHAR('a'), CHAR('a'))))
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   169
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   170
size(ders("".toList, BIG))              // 13
928
2f3c077359c4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   171
size(ders("aa".toList, BIG))            // 51
2f3c077359c4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   172
size(ders("aaaa".toList, BIG))          // 112
2f3c077359c4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   173
size(ders("aaaaaa".toList, BIG))        // 191
2f3c077359c4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   174
size(ders("aaaaaaaa".toList, BIG))      // 288
2f3c077359c4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   175
size(ders("aaaaaaaaaa".toList, BIG))    // 403
2f3c077359c4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   176
size(ders("aaaaaaaaaaaa".toList, BIG))  // 536
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   177
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   178
928
2f3c077359c4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   179
size(ders(("a" * 30).toList, BIG))      // 31010539
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   180
965
94f5cce73a4f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 961
diff changeset
   181
@main
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   182
def test3() = {
928
2f3c077359c4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   183
  println("Test (a + aa)*")
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   184
928
2f3c077359c4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   185
  for (i <- 0 to 30 by 5) {
2f3c077359c4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   186
    println(f"$i: ${time_needed(2, matcher(BIG, "a" * i))}%.5f")
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   187
  }
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   188
}
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   189
981
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   190
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   191
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   192
// Some code for pretty printing regexes as trees
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   193
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   194
def implode(ss: Seq[String]) = ss.mkString("\n")
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   195
def explode(s: String) = s.split("\n").toList
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   196
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   197
def lst(s: String) : String = explode(s) match {
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   198
  case hd :: tl => implode(" └" ++ hd :: tl.map("  " ++ _))
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   199
  case Nil => ""
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   200
}
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   201
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   202
def mid(s: String) : String = explode(s) match {
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   203
  case hd :: tl => implode(" ├" ++ hd :: tl.map(" │" ++ _))
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   204
  case Nil => ""
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   205
}
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   206
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   207
def indent(ss: Seq[String]) : String = ss match {
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   208
  case init :+ last => implode(init.map(mid) :+ lst(last))
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   209
  case _ => ""
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   210
}
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   211
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   212
def pp(e: Rexp) : String = e match {
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   213
  case ZERO => "0\n"
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   214
  case ONE => "1\n"
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   215
  case CHAR(c) => s"$c\n"
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   216
  case ALT(r1, r2) => "ALT\n" ++ pps(r1, r2)
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   217
  case SEQ(r1, r2) => "SEQ\n" ++ pps(r1, r2)
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   218
  case STAR(r) => "STAR\n" ++ pps(r)
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   219
}
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   220
def pps(es: Rexp*) = indent(es.map(pp))
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   221
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   222
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   223
@main
981
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   224
def test4() = {
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   225
  println(pp(r2))
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   226
  println(pp(ders("x".toList, r2)))
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   227
  println(pp(ders("xy".toList, r2)))
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   228
  println(pp(ders("xyz".toList, r2)))
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   229
}
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   230
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   231
@main
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   232
def all() = { test1(); test2() ; test3() ; test4() } 
961
c0600f8b6427 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 941
diff changeset
   233