progs/matcher/re1.sc
author Christian Urban <christian.urban@kcl.ac.uk>
Fri, 05 Sep 2025 16:59:48 +0100
changeset 980 4f422766763f
parent 964 d3e22099963d
child 997 a4212e8bdcad
permissions -rw-r--r--
updated
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     1
// A simple matcher for basic regular expressions
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     2
//
919
d16037caa8fd updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 913
diff changeset
     3
// Call the testcases with X = {1,2,3}
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     4
//
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     5
//   amm re1.sc testX
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     6
//
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     7
// or 
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     8
//
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     9
//   amm re1.sc all
826
e340b32c30a2 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 825
diff changeset
    10
//
e340b32c30a2 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 825
diff changeset
    11
745
905b60a029bf updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 725
diff changeset
    12
 
980
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
    13
// regular expressions (as enum in Scala 3)
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
    14
enum Rexp {
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
    15
  case ZERO                     // matches nothing
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
    16
  case ONE                      // matches an empty string
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
    17
  case CHAR(c: Char)            // matches a character c
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
    18
  case ALT(r1: Rexp, r2: Rexp)  // alternative
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
    19
  case SEQ(r1: Rexp, r2: Rexp)  // sequence
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
    20
  case STAR(r: Rexp)            // star
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
    21
}
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
    22
import Rexp._
919
d16037caa8fd updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 913
diff changeset
    23
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    24
// nullable function: tests whether a regular 
919
d16037caa8fd updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 913
diff changeset
    25
// expression can recognise the empty string  
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    26
def nullable(r: Rexp) : Boolean = r match {
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    27
  case ZERO => false
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    28
  case ONE => true
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    29
  case CHAR(_) => false
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    30
  case ALT(r1, r2) => nullable(r1) || nullable(r2)
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    31
  case SEQ(r1, r2) => nullable(r1) && nullable(r2)
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    32
  case STAR(_) => true
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    33
}
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    34
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    35
// the derivative of a regular expression w.r.t. a character
825
fb9f63a22114 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 769
diff changeset
    36
def der(c: Char, r: Rexp) : Rexp = r match {
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    37
  case ZERO => ZERO
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    38
  case ONE => ZERO
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    39
  case CHAR(d) => if (c == d) ONE else ZERO
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    40
  case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    41
  case SEQ(r1, r2) => 
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    42
    if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    43
    else SEQ(der(c, r1), r2)
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    44
  case STAR(r1) => SEQ(der(c, r1), STAR(r1))
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    45
}
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    46
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    47
// the derivative w.r.t. a string (iterates der)
825
fb9f63a22114 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 769
diff changeset
    48
def ders(s: List[Char], r: Rexp) : Rexp = s match {
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    49
  case Nil => r
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    50
  case c::s => ders(s, der(c, r))
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    51
}
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    52
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    53
// the main matcher function
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    54
def matcher(r: Rexp, s: String) : Boolean = 
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    55
  nullable(ders(s.toList, r))
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    56
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    57
919
d16037caa8fd updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 913
diff changeset
    58
// some examples from the homework
871
358a72d7bf71 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 826
diff changeset
    59
940
1c1fbf45a03c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 928
diff changeset
    60
val r = SEQ(CHAR('a'), CHAR('c'))
913
8f07908dec8d updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 882
diff changeset
    61
matcher(r, "ac")
871
358a72d7bf71 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 826
diff changeset
    62
919
d16037caa8fd updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 913
diff changeset
    63
val r1 = STAR(ALT(SEQ(CHAR('a'), CHAR('b')), CHAR('b')))
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    64
der('a', r)
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    65
der('b', r)
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    66
der('c', r)
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    67
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    68
val r2 = SEQ(SEQ(CHAR('x'), CHAR('y')), CHAR('z'))
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    69
der('x', r2)
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    70
der('y', der('x', r2))
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    71
der('z', der('y', der('x', r2)))
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    72
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    73
919
d16037caa8fd updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 913
diff changeset
    74
// Test Cases
d16037caa8fd updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 913
diff changeset
    75
//============
d16037caa8fd updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 913
diff changeset
    76
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    77
// the optional regular expression (one or zero times)
765
b66602e0b42d updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 759
diff changeset
    78
def OPT(r: Rexp) = ALT(r, ONE)   // r + 1
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    79
928
717ecab7b87a updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 927
diff changeset
    80
// the n-times regular expression (explicitly expanded to SEQs)
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    81
def NTIMES(r: Rexp, n: Int) : Rexp = n match {
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    82
  case 0 => ONE
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    83
  case 1 => r
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    84
  case n => SEQ(r, NTIMES(r, n - 1))
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    85
}
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    86
765
b66602e0b42d updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 759
diff changeset
    87
// the evil regular expression  (a?){n} a{n}
b66602e0b42d updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 759
diff changeset
    88
def EVIL1(n: Int) = 
b66602e0b42d updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 759
diff changeset
    89
  SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n))
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    90
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    91
// the evil regular expression (a*)* b
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    92
val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    93
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    94
// for measuring time
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    95
def time_needed[T](i: Int, code: => T) = {
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    96
  val start = System.nanoTime()
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    97
  for (j <- 1 to i) code
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    98
  val end = System.nanoTime()
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    99
  (end - start) / (i * 1.0e9)
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   100
}
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   101
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   102
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   103
// test: (a?{n}) (a{n})
964
d3e22099963d updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 960
diff changeset
   104
@main
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   105
def test1() = {
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   106
  println("Test (a?{n}) (a{n})")
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   107
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   108
  for (i <- 0 to 20 by 2) {
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   109
    println(f"$i: ${time_needed(2, matcher(EVIL1(i), "a" * i))}%.5f")
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   110
  }
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   111
}
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   112
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   113
// test: (a*)* b
964
d3e22099963d updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 960
diff changeset
   114
@main
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   115
def test2() = {
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   116
  println("Test (a*)* b")
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   117
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   118
  for (i <- 0 to 20 by 2) {
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   119
    println(f"$i: ${time_needed(2, matcher(EVIL2, "a" * i))}%.5f")
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   120
  }
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   121
}
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   122
927
6bfda4b90702 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   123
6bfda4b90702 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   124
6bfda4b90702 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   125
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   126
// the size of a regular expressions - for testing purposes 
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   127
def size(r: Rexp) : Int = r match {
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   128
  case ZERO => 1
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   129
  case ONE => 1
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   130
  case CHAR(_) => 1
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   131
  case ALT(r1, r2) => 1 + size(r1) + size(r2)
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   132
  case SEQ(r1, r2) => 1 + size(r1) + size(r2)
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   133
  case STAR(r) => 1 + size(r)
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   134
}
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   135
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   136
// the expicit expansion in EVIL1(n) increases
769
b153de5339bc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 765
diff changeset
   137
// drastically its size - (a?){n} a{n}
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   138
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   139
size(EVIL1(1))  // 5
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   140
size(EVIL1(3))  // 17
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   141
size(EVIL1(5))  // 29
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   142
size(EVIL1(7))  // 41
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   143
size(EVIL1(20)) // 119
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   144
769
b153de5339bc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 765
diff changeset
   145
size(ders(("a" * 20).toList, EVIL1(20))) 
b153de5339bc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 765
diff changeset
   146
b153de5339bc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 765
diff changeset
   147
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   148
// given a regular expression and building successive
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   149
// derivatives might result into bigger and bigger
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   150
// regular expressions...here is an example for this:
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   151
927
6bfda4b90702 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   152
6bfda4b90702 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   153
// (a + aa)*
6bfda4b90702 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   154
val BIG = STAR(ALT(CHAR('a'), SEQ(CHAR('a'), CHAR('a'))))
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   155
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   156
size(ders("".toList, BIG))              // 13
927
6bfda4b90702 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   157
size(ders("aa".toList, BIG))            // 51
6bfda4b90702 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   158
size(ders("aaaa".toList, BIG))          // 112
6bfda4b90702 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   159
size(ders("aaaaaa".toList, BIG))        // 191
6bfda4b90702 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   160
size(ders("aaaaaaaa".toList, BIG))      // 288
6bfda4b90702 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   161
size(ders("aaaaaaaaaa".toList, BIG))    // 403
6bfda4b90702 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   162
size(ders("aaaaaaaaaaaa".toList, BIG))  // 536
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   163
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   164
927
6bfda4b90702 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   165
size(ders(("a" * 30).toList, BIG))      // 31010539
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   166
964
d3e22099963d updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 960
diff changeset
   167
@main
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   168
def test3() = {
927
6bfda4b90702 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   169
  println("Test (a + aa)*")
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   170
927
6bfda4b90702 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   171
  for (i <- 0 to 30 by 5) {
6bfda4b90702 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   172
    println(f"$i: ${time_needed(2, matcher(BIG, "a" * i))}%.5f")
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   173
  }
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   174
}
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   175
980
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   176
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   177
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   178
// Some code for pretty printing regexes as trees
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   179
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   180
def implode(ss: Seq[String]) = ss.mkString("\n")
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   181
def explode(s: String) = s.split("\n").toList
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   182
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   183
def lst(s: String) : String = explode(s) match {
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   184
  case hd :: tl => implode(" └" ++ hd :: tl.map("  " ++ _))
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   185
  case Nil => ""
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   186
}
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   187
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   188
def mid(s: String) : String = explode(s) match {
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   189
  case hd :: tl => implode(" ├" ++ hd :: tl.map(" │" ++ _))
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   190
  case Nil => ""
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   191
}
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   192
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   193
def indent(ss: Seq[String]) : String = ss match {
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   194
  case init :+ last => implode(init.map(mid) :+ lst(last))
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   195
  case _ => ""
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   196
}
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   197
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   198
def pp(e: Rexp) : String = e match {
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   199
  case ZERO => "0\n"
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   200
  case ONE => "1\n"
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   201
  case CHAR(c) => s"$c\n"
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   202
  case ALT(r1, r2) => "ALT\n" ++ pps(r1, r2)
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   203
  case SEQ(r1, r2) => "SEQ\n" ++ pps(r1, r2)
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   204
  case STAR(r) => "STAR\n" ++ pps(r)
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   205
}
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   206
def pps(es: Rexp*) = indent(es.map(pp))
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   207
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   208
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   209
@main
980
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   210
def test4() = {
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   211
  println(pp(r2))
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   212
  println(pp(ders("x".toList, r2)))
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   213
  println(pp(ders("xy".toList, r2)))
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   214
  println(pp(ders("xyz".toList, r2)))
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   215
}
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   216
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   217
@main
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   218
def all() = { test1(); test2() ; test3() ; test4() } 
960
791f4d9f53e1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 940
diff changeset
   219
964
d3e22099963d updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 960
diff changeset
   220
d3e22099963d updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 960
diff changeset
   221
d3e22099963d updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 960
diff changeset
   222
980
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   223
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   224
// partial derivatives produce a set of regular expressions
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   225
def pder(c: Char, r: Rexp) : Set[Rexp] = r match {
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   226
  case ZERO => Set()
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   227
  case ONE => Set()
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   228
  case CHAR(d) => if (c == d) Set(ONE) else Set()
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   229
  case ALT(r1, r2) => pder(c, r1) ++ pder(c, r2)
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   230
  case SEQ(r1, r2) => {
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   231
    (for (pr1 <- pder(c, r1)) yield SEQ(pr1, r2)) ++
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   232
    (if (nullable(r1)) pder(c, r2) else Set())
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   233
  }
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   234
  case STAR(r1) => {
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   235
    for (pr1 <- pder(c, r1)) yield SEQ(pr1, STAR(r1))
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   236
  }
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   237
}
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   238
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   239
def pders(s: List[Char], rs: Set[Rexp]) : Set[Rexp] = s match {
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   240
  case Nil => rs
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   241
  case c::s => pders(s, rs.flatMap(pder(c, _)))
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   242
}
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   243
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   244
def pders1(s: String, r: Rexp) = pders(s.toList, Set(r))
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   245
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   246