progs/matcher/re1.sc
author Christian Urban <christian.urban@kcl.ac.uk>
Fri, 03 Oct 2025 10:10:33 +0100
changeset 997 a4212e8bdcad
parent 980 4f422766763f
child 1005 970ddba0d72e
permissions -rw-r--r--
updated
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     1
// A simple matcher for basic regular expressions
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     2
//
919
d16037caa8fd updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 913
diff changeset
     3
// Call the testcases with X = {1,2,3}
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     4
//
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     5
//   amm re1.sc testX
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     6
//
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     7
// or 
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     8
//
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
     9
//   amm re1.sc all
826
e340b32c30a2 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 825
diff changeset
    10
//
e340b32c30a2 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 825
diff changeset
    11
997
a4212e8bdcad updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 980
diff changeset
    12
980
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
    13
// regular expressions (as enum in Scala 3)
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
    14
enum Rexp {
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
    15
  case ZERO                     // matches nothing
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
    16
  case ONE                      // matches an empty string
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
    17
  case CHAR(c: Char)            // matches a character c
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
    18
  case ALT(r1: Rexp, r2: Rexp)  // alternative
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
    19
  case SEQ(r1: Rexp, r2: Rexp)  // sequence
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
    20
  case STAR(r: Rexp)            // star
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
    21
}
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
    22
import Rexp._
919
d16037caa8fd updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 913
diff changeset
    23
997
a4212e8bdcad updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 980
diff changeset
    24
a4212e8bdcad updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 980
diff changeset
    25
/* 
a4212e8bdcad updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 980
diff changeset
    26
UPDATE: The videos and handouts still us the older syntax 
a4212e8bdcad updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 980
diff changeset
    27
with classes, which still works but is more verbose
a4212e8bdcad updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 980
diff changeset
    28
a4212e8bdcad updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 980
diff changeset
    29
abstract class Rexp
a4212e8bdcad updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 980
diff changeset
    30
case object ZERO extends Rexp
a4212e8bdcad updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 980
diff changeset
    31
case object ONE extends Rexp
a4212e8bdcad updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 980
diff changeset
    32
case class CHAR(c: Char) extends Rexp
a4212e8bdcad updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 980
diff changeset
    33
case class ALT(r1: Rexp, r2: Rexp) extends Rexp
a4212e8bdcad updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 980
diff changeset
    34
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
a4212e8bdcad updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 980
diff changeset
    35
case class STAR(r: Rexp) extends Rexp
a4212e8bdcad updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 980
diff changeset
    36
*/
a4212e8bdcad updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 980
diff changeset
    37
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    38
// nullable function: tests whether a regular 
919
d16037caa8fd updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 913
diff changeset
    39
// expression can recognise the empty string  
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    40
def nullable(r: Rexp) : Boolean = r match {
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    41
  case ZERO => false
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    42
  case ONE => true
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    43
  case CHAR(_) => false
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    44
  case ALT(r1, r2) => nullable(r1) || nullable(r2)
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    45
  case SEQ(r1, r2) => nullable(r1) && nullable(r2)
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    46
  case STAR(_) => true
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    47
}
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    48
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    49
// the derivative of a regular expression w.r.t. a character
825
fb9f63a22114 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 769
diff changeset
    50
def der(c: Char, r: Rexp) : Rexp = r match {
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    51
  case ZERO => ZERO
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    52
  case ONE => ZERO
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    53
  case CHAR(d) => if (c == d) ONE else ZERO
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    54
  case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    55
  case SEQ(r1, r2) => 
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    56
    if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    57
    else SEQ(der(c, r1), r2)
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    58
  case STAR(r1) => SEQ(der(c, r1), STAR(r1))
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    59
}
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    60
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    61
// the derivative w.r.t. a string (iterates der)
825
fb9f63a22114 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 769
diff changeset
    62
def ders(s: List[Char], r: Rexp) : Rexp = s match {
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    63
  case Nil => r
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    64
  case c::s => ders(s, der(c, r))
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    65
}
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    66
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    67
// the main matcher function
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    68
def matcher(r: Rexp, s: String) : Boolean = 
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    69
  nullable(ders(s.toList, r))
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    70
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    71
919
d16037caa8fd updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 913
diff changeset
    72
// some examples from the homework
871
358a72d7bf71 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 826
diff changeset
    73
940
1c1fbf45a03c updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 928
diff changeset
    74
val r = SEQ(CHAR('a'), CHAR('c'))
913
8f07908dec8d updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 882
diff changeset
    75
matcher(r, "ac")
871
358a72d7bf71 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 826
diff changeset
    76
919
d16037caa8fd updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 913
diff changeset
    77
val r1 = STAR(ALT(SEQ(CHAR('a'), CHAR('b')), CHAR('b')))
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    78
der('a', r)
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    79
der('b', r)
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    80
der('c', r)
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    81
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    82
val r2 = SEQ(SEQ(CHAR('x'), CHAR('y')), CHAR('z'))
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    83
der('x', r2)
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    84
der('y', der('x', r2))
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    85
der('z', der('y', der('x', r2)))
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    86
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    87
919
d16037caa8fd updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 913
diff changeset
    88
// Test Cases
d16037caa8fd updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 913
diff changeset
    89
//============
d16037caa8fd updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 913
diff changeset
    90
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    91
// the optional regular expression (one or zero times)
765
b66602e0b42d updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 759
diff changeset
    92
def OPT(r: Rexp) = ALT(r, ONE)   // r + 1
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    93
928
717ecab7b87a updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 927
diff changeset
    94
// the n-times regular expression (explicitly expanded to SEQs)
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    95
def NTIMES(r: Rexp, n: Int) : Rexp = n match {
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    96
  case 0 => ONE
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    97
  case 1 => r
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    98
  case n => SEQ(r, NTIMES(r, n - 1))
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
    99
}
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   100
765
b66602e0b42d updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 759
diff changeset
   101
// the evil regular expression  (a?){n} a{n}
b66602e0b42d updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 759
diff changeset
   102
def EVIL1(n: Int) = 
b66602e0b42d updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 759
diff changeset
   103
  SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n))
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   104
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   105
// the evil regular expression (a*)* b
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   106
val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   107
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   108
// for measuring time
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   109
def time_needed[T](i: Int, code: => T) = {
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   110
  val start = System.nanoTime()
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   111
  for (j <- 1 to i) code
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   112
  val end = System.nanoTime()
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   113
  (end - start) / (i * 1.0e9)
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   114
}
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   115
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   116
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   117
// test: (a?{n}) (a{n})
964
d3e22099963d updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 960
diff changeset
   118
@main
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   119
def test1() = {
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   120
  println("Test (a?{n}) (a{n})")
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   121
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   122
  for (i <- 0 to 20 by 2) {
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   123
    println(f"$i: ${time_needed(2, matcher(EVIL1(i), "a" * i))}%.5f")
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   124
  }
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   125
}
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   126
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   127
// test: (a*)* b
964
d3e22099963d updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 960
diff changeset
   128
@main
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   129
def test2() = {
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   130
  println("Test (a*)* b")
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   131
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   132
  for (i <- 0 to 20 by 2) {
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   133
    println(f"$i: ${time_needed(2, matcher(EVIL2, "a" * i))}%.5f")
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   134
  }
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   135
}
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   136
927
6bfda4b90702 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   137
6bfda4b90702 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   138
6bfda4b90702 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   139
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   140
// the size of a regular expressions - for testing purposes 
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   141
def size(r: Rexp) : Int = r match {
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   142
  case ZERO => 1
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   143
  case ONE => 1
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   144
  case CHAR(_) => 1
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   145
  case ALT(r1, r2) => 1 + size(r1) + size(r2)
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   146
  case SEQ(r1, r2) => 1 + size(r1) + size(r2)
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   147
  case STAR(r) => 1 + size(r)
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   148
}
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   149
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   150
// the expicit expansion in EVIL1(n) increases
769
b153de5339bc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 765
diff changeset
   151
// drastically its size - (a?){n} a{n}
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   152
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   153
size(EVIL1(1))  // 5
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   154
size(EVIL1(3))  // 17
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   155
size(EVIL1(5))  // 29
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   156
size(EVIL1(7))  // 41
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   157
size(EVIL1(20)) // 119
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   158
769
b153de5339bc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 765
diff changeset
   159
size(ders(("a" * 20).toList, EVIL1(20))) 
b153de5339bc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 765
diff changeset
   160
b153de5339bc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 765
diff changeset
   161
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   162
// given a regular expression and building successive
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   163
// derivatives might result into bigger and bigger
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   164
// regular expressions...here is an example for this:
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   165
927
6bfda4b90702 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   166
6bfda4b90702 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   167
// (a + aa)*
6bfda4b90702 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   168
val BIG = STAR(ALT(CHAR('a'), SEQ(CHAR('a'), CHAR('a'))))
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   169
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   170
size(ders("".toList, BIG))              // 13
927
6bfda4b90702 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   171
size(ders("aa".toList, BIG))            // 51
6bfda4b90702 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   172
size(ders("aaaa".toList, BIG))          // 112
6bfda4b90702 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   173
size(ders("aaaaaa".toList, BIG))        // 191
6bfda4b90702 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   174
size(ders("aaaaaaaa".toList, BIG))      // 288
6bfda4b90702 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   175
size(ders("aaaaaaaaaa".toList, BIG))    // 403
6bfda4b90702 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   176
size(ders("aaaaaaaaaaaa".toList, BIG))  // 536
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   177
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   178
927
6bfda4b90702 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   179
size(ders(("a" * 30).toList, BIG))      // 31010539
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   180
964
d3e22099963d updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 960
diff changeset
   181
@main
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   182
def test3() = {
927
6bfda4b90702 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   183
  println("Test (a + aa)*")
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   184
927
6bfda4b90702 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   185
  for (i <- 0 to 30 by 5) {
6bfda4b90702 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 919
diff changeset
   186
    println(f"$i: ${time_needed(2, matcher(BIG, "a" * i))}%.5f")
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   187
  }
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   188
}
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   189
980
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   190
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   191
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   192
// Some code for pretty printing regexes as trees
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   193
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   194
def implode(ss: Seq[String]) = ss.mkString("\n")
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   195
def explode(s: String) = s.split("\n").toList
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   196
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   197
def lst(s: String) : String = explode(s) match {
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   198
  case hd :: tl => implode(" └" ++ hd :: tl.map("  " ++ _))
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   199
  case Nil => ""
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   200
}
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   201
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   202
def mid(s: String) : String = explode(s) match {
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   203
  case hd :: tl => implode(" ├" ++ hd :: tl.map(" │" ++ _))
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   204
  case Nil => ""
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   205
}
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   206
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   207
def indent(ss: Seq[String]) : String = ss match {
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   208
  case init :+ last => implode(init.map(mid) :+ lst(last))
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   209
  case _ => ""
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   210
}
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   211
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   212
def pp(e: Rexp) : String = e match {
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   213
  case ZERO => "0\n"
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   214
  case ONE => "1\n"
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   215
  case CHAR(c) => s"$c\n"
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   216
  case ALT(r1, r2) => "ALT\n" ++ pps(r1, r2)
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   217
  case SEQ(r1, r2) => "SEQ\n" ++ pps(r1, r2)
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   218
  case STAR(r) => "STAR\n" ++ pps(r)
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   219
}
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   220
def pps(es: Rexp*) = indent(es.map(pp))
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   221
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   222
725
cd72ba78c287 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff changeset
   223
@main
980
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   224
def test4() = {
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   225
  println(pp(r2))
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   226
  println(pp(ders("x".toList, r2)))
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   227
  println(pp(ders("xy".toList, r2)))
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   228
  println(pp(ders("xyz".toList, r2)))
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   229
}
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   230
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   231
@main
4f422766763f updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 964
diff changeset
   232
def all() = { test1(); test2() ; test3() ; test4() } 
960
791f4d9f53e1 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 940
diff changeset
   233