solution/cw1/matcher.sc
changeset 894 4d5058706f1b
parent 893 908e4f6cdf7c
child 895 550676e542d2
--- a/solution/cw1/matcher.sc	Fri Oct 28 09:08:13 2022 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,325 +0,0 @@
-// CW1
-
-abstract class Rexp
-case object ZERO extends Rexp
-case object ONE extends Rexp
-case class CHAR(c: Char) extends Rexp
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
-case class STAR(r: Rexp) extends Rexp 
-
-// extended Rexps
-case class RANGE(s: Set[Char]) extends Rexp
-case class PLUS(r: Rexp) extends Rexp
-case class OPTIONAL(r: Rexp) extends Rexp
-case class NTIMES(r: Rexp, n: Int) extends Rexp 
-case class UPTO(r: Rexp, n: Int) extends Rexp
-case class FROM(r: Rexp, n: Int) extends Rexp
-case class BETWEEN(r: Rexp, n: Int, m: Int) extends Rexp
-case class NOT(r: Rexp) extends Rexp
-
-// functions
-case class CFUN(f: Char => Boolean) extends Rexp
-
-// abbreviations
-def FCHAR(c: Char) = CFUN((a: Char) => a == c)
-def FSET(s: Set[Char]) = CFUN((a: Char) => s.contains(a))
-val FALL = CFUN(_ => true)
-
-def nullable (r: Rexp) : Boolean = r match {
-  case ZERO => false
-  case ONE => true
-  case CHAR(_) => false
-  case ALT(r1, r2) => nullable(r1) || nullable(r2)
-  case SEQ(r1, r2) => nullable(r1) && nullable(r2)
-  case STAR(_) => true
-
-  case RANGE(_) => false 
-  case PLUS(r1) => nullable(r1)
-  case OPTIONAL(_) => true
-  case NTIMES(r1, i) => if (i == 0) true else nullable(r1)
-  case UPTO(_, _) => true
-  case FROM(r1, i) => if (i == 0) true else nullable(r1)
-  case BETWEEN(r1, i, _) => if (i == 0) true else nullable(r1)
-  case NOT(r1) => !nullable(r1)
-
-  case CFUN(f) => false
-}
-
-
-def der (c: Char, r: Rexp) : Rexp = r match {
-  case ZERO => ZERO
-  case ONE => ZERO
-  case CHAR(d) => if (c == d) ONE else ZERO
-  case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
-  case SEQ(r1, r2) => 
-    if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
-    else SEQ(der(c, r1), r2)
-  case STAR(r1) => SEQ(der(c, r1), STAR(r1))
-
-  case RANGE(s) =>
-    if (s.contains(c)) ONE else ZERO 
-  case PLUS(r1) => SEQ(der(c, r1), STAR(r1))
-  case OPTIONAL(r1) => der(c, r1)
-  case NTIMES(r, i) => 
-    if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1))
-  case UPTO(r1, i) => 
-    if (i == 0) ZERO else SEQ(der(c, r1), UPTO(r1, i - 1)) 
-  case FROM(r1, i) =>
-    if (i == 0) SEQ(der(c, r1), FROM(r1, 0)) else
-    SEQ(der(c, r1), FROM(r1, i - 1))
-  case BETWEEN(r1, i, j) => 
-    if (i == 0) {
-      if (j == 0) ZERO else SEQ(der(c, r1), BETWEEN(r1, 0, j - 1))
-    } else SEQ(der(c, r1), BETWEEN(r1, i - 1, j - 1))
-  case NOT(r1) => NOT(der(c, r1))
-
-  case CFUN(f) => if (f(c)) ONE else ZERO
-}
-
-
-def simp(r: Rexp) : Rexp = r match {
-  case ALT(r1, r2) => (simp(r1), simp(r2)) match {
-    case (ZERO, r2s) => r2s
-    case (r1s, ZERO) => r1s
-    case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s)
-  }
-  case SEQ(r1, r2) =>  (simp(r1), simp(r2)) match {
-    case (ZERO, _) => ZERO
-    case (_, ZERO) => ZERO
-    case (ONE, r2s) => r2s
-    case (r1s, ONE) => r1s
-    case (r1s, r2s) => SEQ(r1s, r2s)
-  }
-  case r => r
-}
-
-def ders(s: List[Char], r: Rexp) : Rexp = s match {
-  case Nil => r
-  case c::s => ders(s, simp(der(c, r)))
-}
-
-def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
-
-
-
-val Regex31 = NTIMES(CHAR('a'),3)
-val Regex32 = NTIMES(OPTIONAL(CHAR('a')),3)
-val Regex33 = UPTO(CHAR('a'),3)
-val Regex34 = UPTO(OPTIONAL(CHAR('a')),3)
-val Regex35 = BETWEEN(CHAR('a'),3,5)
-val Regex36 = BETWEEN(OPTIONAL(CHAR('a')),3,5)
-val string31 = ""
-val string32 = "a"
-val string33 = "aa"
-val string34 = "aaa"
-val string35 = "aaaa"
-val string36 = "aaaaa"
-val string37 = "aaaaaa"
-
-
-println("Question3")
-println(matcher(Regex31, string31))
-println(matcher(Regex31, string32))
-println(matcher(Regex31, string33))
-println(matcher(Regex31, string34))
-println(matcher(Regex31, string35))
-println(matcher(Regex31, string36))
-println(matcher(Regex31, string37)); println("")
-println(matcher(Regex32, string31))
-println(matcher(Regex32, string32))
-println(matcher(Regex32, string33))
-println(matcher(Regex32, string34))
-println(matcher(Regex32, string35))
-println(matcher(Regex32, string36))
-println(matcher(Regex32, string37)); println("")
-println(matcher(Regex33, string31))
-println(matcher(Regex33, string32))
-println(matcher(Regex33, string33))
-println(matcher(Regex33, string34))
-println(matcher(Regex33, string35))
-println(matcher(Regex33, string36))
-println(matcher(Regex33, string37)); println("")
-println(matcher(Regex34, string31))
-println(matcher(Regex34, string32))
-println(matcher(Regex34, string33))
-println(matcher(Regex34, string34))
-println(matcher(Regex34, string35))
-println(matcher(Regex34, string36))
-println(matcher(Regex34, string37)); println("")
-println(matcher(Regex35, string31))
-println(matcher(Regex35, string32))
-println(matcher(Regex35, string33))
-println(matcher(Regex35, string34))
-println(matcher(Regex35, string35))
-println(matcher(Regex35, string36))
-println(matcher(Regex35, string37)); println("")
-println(matcher(Regex36, string31))
-println(matcher(Regex36, string32))
-println(matcher(Regex36, string33))
-println(matcher(Regex36, string34))
-println(matcher(Regex36, string35))
-println(matcher(Regex36, string36))
-println(matcher(Regex36, string37)); println("")
-
-
-val RegexX = BETWEEN(CHAR('a'), 0, 5)
-val stringX = ""
-println(matcher(RegexX, stringX))
-
-
-
-val str0 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-val str1 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-val str2 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-
-val matchA = (c:Char) => c == 'a'
-
-val reg_1 = SEQ(SEQ(CFUN(matchA), CFUN(matchA)), CFUN(matchA))
-val reg_2 = SEQ(NTIMES(CFUN(matchA), 19), OPTIONAL(CFUN(matchA)))
-
-val reg_1_mod = PLUS(PLUS(reg_1))
-val reg_2_mod = PLUS(PLUS(reg_2))
-
-
-matcher(reg_1_mod, str0)
-matcher(reg_1_mod, str1)
-matcher(reg_1_mod, str2)
-matcher(reg_2_mod, str0)
-matcher(reg_2_mod, str1)
-matcher(reg_2_mod, str2)
-
-
-
-
-
-//Q3: matcher test (table)
-
-// a^{3}
-val re1 = NTIMES(CHAR('a'), 3)
-
-matcher(re1, "")        //false
-matcher(re1, "a")       //false
-matcher(re1, "aa")      //false
-matcher(re1, "aaa")     //true
-matcher(re1, "aaaaa")   //false
-matcher(re1, "aaaaaa")  //false
-
-// (a?)^{3}
-val re2 = NTIMES(OPTIONAL(CHAR('a')), 3)
-
-matcher(re2, "")        //true
-matcher(re2, "a")       //true
-matcher(re2, "aa")      //true
-matcher(re2, "aaa")     //true
-matcher(re2, "aaaaa")   //false
-matcher(re2, "aaaaaa")  //false
-
-// (a)^{..3}
-val re3 = UPTO((CHAR('a')), 3)
-
-matcher(re3, "")        //true
-matcher(re3, "a")       //true
-matcher(re3, "aa")      //true
-matcher(re3, "aaa")     //true
-matcher(re3, "aaaaa")   //false
-matcher(re3, "aaaaaa")  //false
-
-// (a?)^{..3}
-val re4 = UPTO(OPTIONAL(CHAR('a')), 3)
-
-matcher(re4, "")        //true
-matcher(re4, "a")       //true
-matcher(re4, "aa")      //true
-matcher(re4, "aaa")     //true
-matcher(re4, "aaaaa")   //false
-matcher(re4, "aaaaaa")  //false
-
-// (a)^{3..5}
-val re5 = BETWEEN(CHAR('a'), 3, 5)
-
-matcher(re5, "")        //false
-matcher(re5, "a")       //false
-matcher(re5, "aa")      //false
-matcher(re5, "aaa")     //true
-matcher(re5, "aaaaa")   //true
-matcher(re5, "aaaaaa")  //false
-
-// (a?)^{3..5}
-val re6 = BETWEEN(OPTIONAL(CHAR('a')), 3, 5)
-
-matcher(re6, "")        //true
-matcher(re6, "a")       //true
-matcher(re6, "aa")      //true
-matcher(re6, "aaa")     //true
-matcher(re6, "aaaaa")   //true
-matcher(re6, "aaaaaa")  //false
-
-//Q5: regular expression for email addresses
-
-val alphaNum = ('a' to 'z').toSet ++ ('0' to '9')
-val Q5email = SEQ(
-                PLUS(RANGE(alphaNum + '_' + '-' + '.')), 
-                SEQ(
-                    CHAR('@'), 
-                    SEQ(
-                        PLUS(RANGE(alphaNum + '-' + '.')), 
-                        SEQ(
-                            CHAR('.'), 
-                            BETWEEN(RANGE(('a' to 'z').toSet + '.'), 2, 6)
-                            )
-                      )
-                )
-              )
-
-ders("nicolas.volken@kcl.ac.uk".toList, Q5email)
-
-// Q6
-val Q6 = SEQ(CHAR('/'),
-            SEQ(CHAR('*'),
-                SEQ(
-                  
-                    NOT(
-                      SEQ(
-                        STAR(FALL),
-                        SEQ(
-                          CHAR('*'),
-                          SEQ(
-                            CHAR('/'),
-                            STAR(FALL)
-                          )
-                        )
-                      )
-                    )
-                  
-                    ,
-                    SEQ(CHAR('*'),
-                    CHAR('/')
-                    )
-                )
-            )
-        )
-
-matcher(Q6, "/**/")
-matcher(Q6, "/*foobar*/")
-matcher(Q6, "/*test*/test*/")
-matcher(Q6, "/*test/*test*/")
-
-// Q7
-
-val Q7r1 = SEQ(CHAR('a'), SEQ(CHAR('a'), CHAR('a')))
-val Q7r2 = SEQ(BETWEEN(CHAR('a'), 19, 19), OPTIONAL(CHAR('a')))
-
-val Q7str5 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-val Q7str6 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-val Q7str7 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-
-matcher(PLUS(PLUS(Q7r1)), Q7str5)
-matcher(PLUS(PLUS(Q7r2)), Q7str5)
-
-matcher(PLUS(PLUS(Q7r1)), Q7str6)
-matcher(PLUS(PLUS(Q7r2)), Q7str6)
-
-matcher(PLUS(PLUS(Q7r1)), Q7str7)
-matcher(PLUS(PLUS(Q7r2)), Q7str7)
-