progs/matcher/re1.sc
author Christian Urban <christian.urban@kcl.ac.uk>
Fri, 05 Sep 2025 16:59:48 +0100
changeset 981 14e5ae1fb541
parent 965 94f5cce73a4f
permissions -rw-r--r--
updated
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     1
// A simple matcher for basic regular expressions
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     2
//
919
53f08d873e09 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 913
diff changeset
     3
// Call the testcases with X = {1,2,3}
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     4
//
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     5
//   amm re1.sc testX
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     6
//
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     7
// or 
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     8
//
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     9
//   amm re1.sc all
826
b0352633bf48 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 825
diff changeset
    10
//
b0352633bf48 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 825
diff changeset
    11
745
7dc3643a0cc5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 725
diff changeset
    12
 
981
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
    13
// regular expressions (as enum in Scala 3)
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
    14
enum Rexp {
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
    15
  case ZERO                     // matches nothing
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
    16
  case ONE                      // matches an empty string
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
    17
  case CHAR(c: Char)            // matches a character c
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
    18
  case ALT(r1: Rexp, r2: Rexp)  // alternative
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
    19
  case SEQ(r1: Rexp, r2: Rexp)  // sequence
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
    20
  case STAR(r: Rexp)            // star
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
    21
}
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
    22
import Rexp._
919
53f08d873e09 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 913
diff changeset
    23
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    24
// nullable function: tests whether a regular 
919
53f08d873e09 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 913
diff changeset
    25
// expression can recognise the empty string  
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    26
def nullable(r: Rexp) : Boolean = r match {
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    27
  case ZERO => false
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    28
  case ONE => true
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    29
  case CHAR(_) => false
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    30
  case ALT(r1, r2) => nullable(r1) || nullable(r2)
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    31
  case SEQ(r1, r2) => nullable(r1) && nullable(r2)
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    32
  case STAR(_) => true
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    33
}
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    34
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    35
// the derivative of a regular expression w.r.t. a character
825
dca072e2bb7d updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 769
diff changeset
    36
def der(c: Char, r: Rexp) : Rexp = r match {
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    37
  case ZERO => ZERO
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    38
  case ONE => ZERO
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    39
  case CHAR(d) => if (c == d) ONE else ZERO
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    40
  case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    41
  case SEQ(r1, r2) => 
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    42
    if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    43
    else SEQ(der(c, r1), r2)
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    44
  case STAR(r1) => SEQ(der(c, r1), STAR(r1))
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    45
}
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    46
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    47
// the derivative w.r.t. a string (iterates der)
825
dca072e2bb7d updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 769
diff changeset
    48
def ders(s: List[Char], r: Rexp) : Rexp = s match {
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    49
  case Nil => r
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    50
  case c::s => ders(s, der(c, r))
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    51
}
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    52
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    53
// the main matcher function
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    54
def matcher(r: Rexp, s: String) : Boolean = 
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    55
  nullable(ders(s.toList, r))
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    56
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    57
919
53f08d873e09 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 913
diff changeset
    58
// some examples from the homework
871
94b84d880c2b updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 826
diff changeset
    59
941
66adcae6c762 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 929
diff changeset
    60
val r = SEQ(CHAR('a'), CHAR('c'))
913
eef6a56c185a updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 882
diff changeset
    61
matcher(r, "ac")
871
94b84d880c2b updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 826
diff changeset
    62
919
53f08d873e09 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 913
diff changeset
    63
val r1 = STAR(ALT(SEQ(CHAR('a'), CHAR('b')), CHAR('b')))
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    64
der('a', r)
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    65
der('b', r)
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    66
der('c', r)
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    67
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    68
val r2 = SEQ(SEQ(CHAR('x'), CHAR('y')), CHAR('z'))
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    69
der('x', r2)
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    70
der('y', der('x', r2))
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    71
der('z', der('y', der('x', r2)))
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    72
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    73
919
53f08d873e09 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 913
diff changeset
    74
// Test Cases
53f08d873e09 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 913
diff changeset
    75
//============
53f08d873e09 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 913
diff changeset
    76
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    77
// the optional regular expression (one or zero times)
765
b294cfbb5c01 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 759
diff changeset
    78
def OPT(r: Rexp) = ALT(r, ONE)   // r + 1
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    79
929
9541e073f2ed updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 928
diff changeset
    80
// the n-times regular expression (explicitly expanded to SEQs)
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    81
def NTIMES(r: Rexp, n: Int) : Rexp = n match {
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    82
  case 0 => ONE
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    83
  case 1 => r
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    84
  case n => SEQ(r, NTIMES(r, n - 1))
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    85
}
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    86
765
b294cfbb5c01 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 759
diff changeset
    87
// the evil regular expression  (a?){n} a{n}
b294cfbb5c01 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 759
diff changeset
    88
def EVIL1(n: Int) = 
b294cfbb5c01 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 759
diff changeset
    89
  SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n))
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    90
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    91
// the evil regular expression (a*)* b
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    92
val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    93
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    94
// for measuring time
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    95
def time_needed[T](i: Int, code: => T) = {
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    96
  val start = System.nanoTime()
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    97
  for (j <- 1 to i) code
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    98
  val end = System.nanoTime()
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    99
  (end - start) / (i * 1.0e9)
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   100
}
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   101
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   102
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   103
// test: (a?{n}) (a{n})
965
94f5cce73a4f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 961
diff changeset
   104
@main
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   105
def test1() = {
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   106
  println("Test (a?{n}) (a{n})")
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   107
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   108
  for (i <- 0 to 20 by 2) {
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   109
    println(f"$i: ${time_needed(2, matcher(EVIL1(i), "a" * i))}%.5f")
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   110
  }
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   111
}
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   112
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   113
// test: (a*)* b
965
94f5cce73a4f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 961
diff changeset
   114
@main
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   115
def test2() = {
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   116
  println("Test (a*)* b")
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   117
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   118
  for (i <- 0 to 20 by 2) {
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   119
    println(f"$i: ${time_needed(2, matcher(EVIL2, "a" * i))}%.5f")
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   120
  }
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   121
}
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   122
928
2f3c077359c4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   123
2f3c077359c4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   124
2f3c077359c4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   125
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   126
// the size of a regular expressions - for testing purposes 
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   127
def size(r: Rexp) : Int = r match {
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   128
  case ZERO => 1
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   129
  case ONE => 1
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   130
  case CHAR(_) => 1
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   131
  case ALT(r1, r2) => 1 + size(r1) + size(r2)
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   132
  case SEQ(r1, r2) => 1 + size(r1) + size(r2)
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   133
  case STAR(r) => 1 + size(r)
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   134
}
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   135
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   136
// the expicit expansion in EVIL1(n) increases
769
f9686b22db7e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 765
diff changeset
   137
// drastically its size - (a?){n} a{n}
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   138
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   139
size(EVIL1(1))  // 5
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   140
size(EVIL1(3))  // 17
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   141
size(EVIL1(5))  // 29
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   142
size(EVIL1(7))  // 41
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   143
size(EVIL1(20)) // 119
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   144
769
f9686b22db7e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 765
diff changeset
   145
size(ders(("a" * 20).toList, EVIL1(20))) 
f9686b22db7e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 765
diff changeset
   146
f9686b22db7e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 765
diff changeset
   147
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   148
// given a regular expression and building successive
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   149
// derivatives might result into bigger and bigger
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   150
// regular expressions...here is an example for this:
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   151
928
2f3c077359c4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   152
2f3c077359c4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   153
// (a + aa)*
2f3c077359c4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   154
val BIG = STAR(ALT(CHAR('a'), SEQ(CHAR('a'), CHAR('a'))))
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   155
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   156
size(ders("".toList, BIG))              // 13
928
2f3c077359c4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   157
size(ders("aa".toList, BIG))            // 51
2f3c077359c4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   158
size(ders("aaaa".toList, BIG))          // 112
2f3c077359c4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   159
size(ders("aaaaaa".toList, BIG))        // 191
2f3c077359c4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   160
size(ders("aaaaaaaa".toList, BIG))      // 288
2f3c077359c4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   161
size(ders("aaaaaaaaaa".toList, BIG))    // 403
2f3c077359c4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   162
size(ders("aaaaaaaaaaaa".toList, BIG))  // 536
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   163
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   164
928
2f3c077359c4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   165
size(ders(("a" * 30).toList, BIG))      // 31010539
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   166
965
94f5cce73a4f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 961
diff changeset
   167
@main
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   168
def test3() = {
928
2f3c077359c4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   169
  println("Test (a + aa)*")
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   170
928
2f3c077359c4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   171
  for (i <- 0 to 30 by 5) {
2f3c077359c4 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   172
    println(f"$i: ${time_needed(2, matcher(BIG, "a" * i))}%.5f")
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   173
  }
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   174
}
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   175
981
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   176
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   177
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   178
// Some code for pretty printing regexes as trees
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   179
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   180
def implode(ss: Seq[String]) = ss.mkString("\n")
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   181
def explode(s: String) = s.split("\n").toList
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   182
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   183
def lst(s: String) : String = explode(s) match {
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   184
  case hd :: tl => implode(" └" ++ hd :: tl.map("  " ++ _))
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   185
  case Nil => ""
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   186
}
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   187
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   188
def mid(s: String) : String = explode(s) match {
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   189
  case hd :: tl => implode(" ├" ++ hd :: tl.map(" │" ++ _))
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   190
  case Nil => ""
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   191
}
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   192
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   193
def indent(ss: Seq[String]) : String = ss match {
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   194
  case init :+ last => implode(init.map(mid) :+ lst(last))
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   195
  case _ => ""
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   196
}
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   197
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   198
def pp(e: Rexp) : String = e match {
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   199
  case ZERO => "0\n"
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   200
  case ONE => "1\n"
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   201
  case CHAR(c) => s"$c\n"
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   202
  case ALT(r1, r2) => "ALT\n" ++ pps(r1, r2)
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   203
  case SEQ(r1, r2) => "SEQ\n" ++ pps(r1, r2)
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   204
  case STAR(r) => "STAR\n" ++ pps(r)
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   205
}
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   206
def pps(es: Rexp*) = indent(es.map(pp))
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   207
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   208
725
f345e89895f5 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   209
@main
981
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   210
def test4() = {
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   211
  println(pp(r2))
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   212
  println(pp(ders("x".toList, r2)))
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   213
  println(pp(ders("xy".toList, r2)))
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   214
  println(pp(ders("xyz".toList, r2)))
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   215
}
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   216
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   217
@main
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   218
def all() = { test1(); test2() ; test3() ; test4() } 
961
c0600f8b6427 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 941
diff changeset
   219
965
94f5cce73a4f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 961
diff changeset
   220
94f5cce73a4f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 961
diff changeset
   221
94f5cce73a4f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 961
diff changeset
   222
981
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   223
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   224
// partial derivatives produce a set of regular expressions
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   225
def pder(c: Char, r: Rexp) : Set[Rexp] = r match {
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   226
  case ZERO => Set()
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   227
  case ONE => Set()
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   228
  case CHAR(d) => if (c == d) Set(ONE) else Set()
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   229
  case ALT(r1, r2) => pder(c, r1) ++ pder(c, r2)
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   230
  case SEQ(r1, r2) => {
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   231
    (for (pr1 <- pder(c, r1)) yield SEQ(pr1, r2)) ++
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   232
    (if (nullable(r1)) pder(c, r2) else Set())
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   233
  }
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   234
  case STAR(r1) => {
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   235
    for (pr1 <- pder(c, r1)) yield SEQ(pr1, STAR(r1))
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   236
  }
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   237
}
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   238
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   239
def pders(s: List[Char], rs: Set[Rexp]) : Set[Rexp] = s match {
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   240
  case Nil => rs
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   241
  case c::s => pders(s, rs.flatMap(pder(c, _)))
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   242
}
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   243
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   244
def pders1(s: String, r: Rexp) = pders(s.toList, Set(r))
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   245
14e5ae1fb541 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 965
diff changeset
   246