--- a/solution/cw1/matcher.sc Fri Oct 28 09:08:13 2022 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,325 +0,0 @@
-// CW1
-
-abstract class Rexp
-case object ZERO extends Rexp
-case object ONE extends Rexp
-case class CHAR(c: Char) extends Rexp
-case class ALT(r1: Rexp, r2: Rexp) extends Rexp
-case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
-case class STAR(r: Rexp) extends Rexp
-
-// extended Rexps
-case class RANGE(s: Set[Char]) extends Rexp
-case class PLUS(r: Rexp) extends Rexp
-case class OPTIONAL(r: Rexp) extends Rexp
-case class NTIMES(r: Rexp, n: Int) extends Rexp
-case class UPTO(r: Rexp, n: Int) extends Rexp
-case class FROM(r: Rexp, n: Int) extends Rexp
-case class BETWEEN(r: Rexp, n: Int, m: Int) extends Rexp
-case class NOT(r: Rexp) extends Rexp
-
-// functions
-case class CFUN(f: Char => Boolean) extends Rexp
-
-// abbreviations
-def FCHAR(c: Char) = CFUN((a: Char) => a == c)
-def FSET(s: Set[Char]) = CFUN((a: Char) => s.contains(a))
-val FALL = CFUN(_ => true)
-
-def nullable (r: Rexp) : Boolean = r match {
- case ZERO => false
- case ONE => true
- case CHAR(_) => false
- case ALT(r1, r2) => nullable(r1) || nullable(r2)
- case SEQ(r1, r2) => nullable(r1) && nullable(r2)
- case STAR(_) => true
-
- case RANGE(_) => false
- case PLUS(r1) => nullable(r1)
- case OPTIONAL(_) => true
- case NTIMES(r1, i) => if (i == 0) true else nullable(r1)
- case UPTO(_, _) => true
- case FROM(r1, i) => if (i == 0) true else nullable(r1)
- case BETWEEN(r1, i, _) => if (i == 0) true else nullable(r1)
- case NOT(r1) => !nullable(r1)
-
- case CFUN(f) => false
-}
-
-
-def der (c: Char, r: Rexp) : Rexp = r match {
- case ZERO => ZERO
- case ONE => ZERO
- case CHAR(d) => if (c == d) ONE else ZERO
- case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
- case SEQ(r1, r2) =>
- if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
- else SEQ(der(c, r1), r2)
- case STAR(r1) => SEQ(der(c, r1), STAR(r1))
-
- case RANGE(s) =>
- if (s.contains(c)) ONE else ZERO
- case PLUS(r1) => SEQ(der(c, r1), STAR(r1))
- case OPTIONAL(r1) => der(c, r1)
- case NTIMES(r, i) =>
- if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1))
- case UPTO(r1, i) =>
- if (i == 0) ZERO else SEQ(der(c, r1), UPTO(r1, i - 1))
- case FROM(r1, i) =>
- if (i == 0) SEQ(der(c, r1), FROM(r1, 0)) else
- SEQ(der(c, r1), FROM(r1, i - 1))
- case BETWEEN(r1, i, j) =>
- if (i == 0) {
- if (j == 0) ZERO else SEQ(der(c, r1), BETWEEN(r1, 0, j - 1))
- } else SEQ(der(c, r1), BETWEEN(r1, i - 1, j - 1))
- case NOT(r1) => NOT(der(c, r1))
-
- case CFUN(f) => if (f(c)) ONE else ZERO
-}
-
-
-def simp(r: Rexp) : Rexp = r match {
- case ALT(r1, r2) => (simp(r1), simp(r2)) match {
- case (ZERO, r2s) => r2s
- case (r1s, ZERO) => r1s
- case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s)
- }
- case SEQ(r1, r2) => (simp(r1), simp(r2)) match {
- case (ZERO, _) => ZERO
- case (_, ZERO) => ZERO
- case (ONE, r2s) => r2s
- case (r1s, ONE) => r1s
- case (r1s, r2s) => SEQ(r1s, r2s)
- }
- case r => r
-}
-
-def ders(s: List[Char], r: Rexp) : Rexp = s match {
- case Nil => r
- case c::s => ders(s, simp(der(c, r)))
-}
-
-def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
-
-
-
-val Regex31 = NTIMES(CHAR('a'),3)
-val Regex32 = NTIMES(OPTIONAL(CHAR('a')),3)
-val Regex33 = UPTO(CHAR('a'),3)
-val Regex34 = UPTO(OPTIONAL(CHAR('a')),3)
-val Regex35 = BETWEEN(CHAR('a'),3,5)
-val Regex36 = BETWEEN(OPTIONAL(CHAR('a')),3,5)
-val string31 = ""
-val string32 = "a"
-val string33 = "aa"
-val string34 = "aaa"
-val string35 = "aaaa"
-val string36 = "aaaaa"
-val string37 = "aaaaaa"
-
-
-println("Question3")
-println(matcher(Regex31, string31))
-println(matcher(Regex31, string32))
-println(matcher(Regex31, string33))
-println(matcher(Regex31, string34))
-println(matcher(Regex31, string35))
-println(matcher(Regex31, string36))
-println(matcher(Regex31, string37)); println("")
-println(matcher(Regex32, string31))
-println(matcher(Regex32, string32))
-println(matcher(Regex32, string33))
-println(matcher(Regex32, string34))
-println(matcher(Regex32, string35))
-println(matcher(Regex32, string36))
-println(matcher(Regex32, string37)); println("")
-println(matcher(Regex33, string31))
-println(matcher(Regex33, string32))
-println(matcher(Regex33, string33))
-println(matcher(Regex33, string34))
-println(matcher(Regex33, string35))
-println(matcher(Regex33, string36))
-println(matcher(Regex33, string37)); println("")
-println(matcher(Regex34, string31))
-println(matcher(Regex34, string32))
-println(matcher(Regex34, string33))
-println(matcher(Regex34, string34))
-println(matcher(Regex34, string35))
-println(matcher(Regex34, string36))
-println(matcher(Regex34, string37)); println("")
-println(matcher(Regex35, string31))
-println(matcher(Regex35, string32))
-println(matcher(Regex35, string33))
-println(matcher(Regex35, string34))
-println(matcher(Regex35, string35))
-println(matcher(Regex35, string36))
-println(matcher(Regex35, string37)); println("")
-println(matcher(Regex36, string31))
-println(matcher(Regex36, string32))
-println(matcher(Regex36, string33))
-println(matcher(Regex36, string34))
-println(matcher(Regex36, string35))
-println(matcher(Regex36, string36))
-println(matcher(Regex36, string37)); println("")
-
-
-val RegexX = BETWEEN(CHAR('a'), 0, 5)
-val stringX = ""
-println(matcher(RegexX, stringX))
-
-
-
-val str0 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-val str1 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-val str2 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-
-val matchA = (c:Char) => c == 'a'
-
-val reg_1 = SEQ(SEQ(CFUN(matchA), CFUN(matchA)), CFUN(matchA))
-val reg_2 = SEQ(NTIMES(CFUN(matchA), 19), OPTIONAL(CFUN(matchA)))
-
-val reg_1_mod = PLUS(PLUS(reg_1))
-val reg_2_mod = PLUS(PLUS(reg_2))
-
-
-matcher(reg_1_mod, str0)
-matcher(reg_1_mod, str1)
-matcher(reg_1_mod, str2)
-matcher(reg_2_mod, str0)
-matcher(reg_2_mod, str1)
-matcher(reg_2_mod, str2)
-
-
-
-
-
-//Q3: matcher test (table)
-
-// a^{3}
-val re1 = NTIMES(CHAR('a'), 3)
-
-matcher(re1, "") //false
-matcher(re1, "a") //false
-matcher(re1, "aa") //false
-matcher(re1, "aaa") //true
-matcher(re1, "aaaaa") //false
-matcher(re1, "aaaaaa") //false
-
-// (a?)^{3}
-val re2 = NTIMES(OPTIONAL(CHAR('a')), 3)
-
-matcher(re2, "") //true
-matcher(re2, "a") //true
-matcher(re2, "aa") //true
-matcher(re2, "aaa") //true
-matcher(re2, "aaaaa") //false
-matcher(re2, "aaaaaa") //false
-
-// (a)^{..3}
-val re3 = UPTO((CHAR('a')), 3)
-
-matcher(re3, "") //true
-matcher(re3, "a") //true
-matcher(re3, "aa") //true
-matcher(re3, "aaa") //true
-matcher(re3, "aaaaa") //false
-matcher(re3, "aaaaaa") //false
-
-// (a?)^{..3}
-val re4 = UPTO(OPTIONAL(CHAR('a')), 3)
-
-matcher(re4, "") //true
-matcher(re4, "a") //true
-matcher(re4, "aa") //true
-matcher(re4, "aaa") //true
-matcher(re4, "aaaaa") //false
-matcher(re4, "aaaaaa") //false
-
-// (a)^{3..5}
-val re5 = BETWEEN(CHAR('a'), 3, 5)
-
-matcher(re5, "") //false
-matcher(re5, "a") //false
-matcher(re5, "aa") //false
-matcher(re5, "aaa") //true
-matcher(re5, "aaaaa") //true
-matcher(re5, "aaaaaa") //false
-
-// (a?)^{3..5}
-val re6 = BETWEEN(OPTIONAL(CHAR('a')), 3, 5)
-
-matcher(re6, "") //true
-matcher(re6, "a") //true
-matcher(re6, "aa") //true
-matcher(re6, "aaa") //true
-matcher(re6, "aaaaa") //true
-matcher(re6, "aaaaaa") //false
-
-//Q5: regular expression for email addresses
-
-val alphaNum = ('a' to 'z').toSet ++ ('0' to '9')
-val Q5email = SEQ(
- PLUS(RANGE(alphaNum + '_' + '-' + '.')),
- SEQ(
- CHAR('@'),
- SEQ(
- PLUS(RANGE(alphaNum + '-' + '.')),
- SEQ(
- CHAR('.'),
- BETWEEN(RANGE(('a' to 'z').toSet + '.'), 2, 6)
- )
- )
- )
- )
-
-ders("nicolas.volken@kcl.ac.uk".toList, Q5email)
-
-// Q6
-val Q6 = SEQ(CHAR('/'),
- SEQ(CHAR('*'),
- SEQ(
-
- NOT(
- SEQ(
- STAR(FALL),
- SEQ(
- CHAR('*'),
- SEQ(
- CHAR('/'),
- STAR(FALL)
- )
- )
- )
- )
-
- ,
- SEQ(CHAR('*'),
- CHAR('/')
- )
- )
- )
- )
-
-matcher(Q6, "/**/")
-matcher(Q6, "/*foobar*/")
-matcher(Q6, "/*test*/test*/")
-matcher(Q6, "/*test/*test*/")
-
-// Q7
-
-val Q7r1 = SEQ(CHAR('a'), SEQ(CHAR('a'), CHAR('a')))
-val Q7r2 = SEQ(BETWEEN(CHAR('a'), 19, 19), OPTIONAL(CHAR('a')))
-
-val Q7str5 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-val Q7str6 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-val Q7str7 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-
-matcher(PLUS(PLUS(Q7r1)), Q7str5)
-matcher(PLUS(PLUS(Q7r2)), Q7str5)
-
-matcher(PLUS(PLUS(Q7r1)), Q7str6)
-matcher(PLUS(PLUS(Q7r2)), Q7str6)
-
-matcher(PLUS(PLUS(Q7r1)), Q7str7)
-matcher(PLUS(PLUS(Q7r2)), Q7str7)
-