--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/solutions/cw1/matcher.sc Fri Nov 04 12:07:40 2022 +0000
@@ -0,0 +1,325 @@
+// CW1
+
+abstract class Rexp
+case object ZERO extends Rexp
+case object ONE extends Rexp
+case class CHAR(c: Char) extends Rexp
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+case class STAR(r: Rexp) extends Rexp
+
+// extended Rexps
+case class RANGE(s: Set[Char]) extends Rexp
+case class PLUS(r: Rexp) extends Rexp
+case class OPTIONAL(r: Rexp) extends Rexp
+case class NTIMES(r: Rexp, n: Int) extends Rexp
+case class UPTO(r: Rexp, n: Int) extends Rexp
+case class FROM(r: Rexp, n: Int) extends Rexp
+case class BETWEEN(r: Rexp, n: Int, m: Int) extends Rexp
+case class NOT(r: Rexp) extends Rexp
+
+// functions
+case class CFUN(f: Char => Boolean) extends Rexp
+
+// abbreviations
+def FCHAR(c: Char) = CFUN((a: Char) => a == c)
+def FSET(s: Set[Char]) = CFUN((a: Char) => s.contains(a))
+val FALL = CFUN(_ => true)
+
+def nullable (r: Rexp) : Boolean = r match {
+ case ZERO => false
+ case ONE => true
+ case CHAR(_) => false
+ case ALT(r1, r2) => nullable(r1) || nullable(r2)
+ case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ case STAR(_) => true
+
+ case RANGE(_) => false
+ case PLUS(r1) => nullable(r1)
+ case OPTIONAL(_) => true
+ case NTIMES(r1, i) => if (i == 0) true else nullable(r1)
+ case UPTO(_, _) => true
+ case FROM(r1, i) => if (i == 0) true else nullable(r1)
+ case BETWEEN(r1, i, _) => if (i == 0) true else nullable(r1)
+ case NOT(r1) => !nullable(r1)
+
+ case CFUN(f) => false
+}
+
+
+def der (c: Char, r: Rexp) : Rexp = r match {
+ case ZERO => ZERO
+ case ONE => ZERO
+ case CHAR(d) => if (c == d) ONE else ZERO
+ case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
+ case SEQ(r1, r2) =>
+ if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
+ else SEQ(der(c, r1), r2)
+ case STAR(r1) => SEQ(der(c, r1), STAR(r1))
+
+ case RANGE(s) =>
+ if (s.contains(c)) ONE else ZERO
+ case PLUS(r1) => SEQ(der(c, r1), STAR(r1))
+ case OPTIONAL(r1) => der(c, r1)
+ case NTIMES(r, i) =>
+ if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1))
+ case UPTO(r1, i) =>
+ if (i == 0) ZERO else SEQ(der(c, r1), UPTO(r1, i - 1))
+ case FROM(r1, i) =>
+ if (i == 0) SEQ(der(c, r1), FROM(r1, 0)) else
+ SEQ(der(c, r1), FROM(r1, i - 1))
+ case BETWEEN(r1, i, j) =>
+ if (i == 0) {
+ if (j == 0) ZERO else SEQ(der(c, r1), BETWEEN(r1, 0, j - 1))
+ } else SEQ(der(c, r1), BETWEEN(r1, i - 1, j - 1))
+ case NOT(r1) => NOT(der(c, r1))
+
+ case CFUN(f) => if (f(c)) ONE else ZERO
+}
+
+
+def simp(r: Rexp) : Rexp = r match {
+ case ALT(r1, r2) => (simp(r1), simp(r2)) match {
+ case (ZERO, r2s) => r2s
+ case (r1s, ZERO) => r1s
+ case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s)
+ }
+ case SEQ(r1, r2) => (simp(r1), simp(r2)) match {
+ case (ZERO, _) => ZERO
+ case (_, ZERO) => ZERO
+ case (ONE, r2s) => r2s
+ case (r1s, ONE) => r1s
+ case (r1s, r2s) => SEQ(r1s, r2s)
+ }
+ case r => r
+}
+
+def ders(s: List[Char], r: Rexp) : Rexp = s match {
+ case Nil => r
+ case c::s => ders(s, simp(der(c, r)))
+}
+
+def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
+
+
+
+val Regex31 = NTIMES(CHAR('a'),3)
+val Regex32 = NTIMES(OPTIONAL(CHAR('a')),3)
+val Regex33 = UPTO(CHAR('a'),3)
+val Regex34 = UPTO(OPTIONAL(CHAR('a')),3)
+val Regex35 = BETWEEN(CHAR('a'),3,5)
+val Regex36 = BETWEEN(OPTIONAL(CHAR('a')),3,5)
+val string31 = ""
+val string32 = "a"
+val string33 = "aa"
+val string34 = "aaa"
+val string35 = "aaaa"
+val string36 = "aaaaa"
+val string37 = "aaaaaa"
+
+
+println("Question3")
+println(matcher(Regex31, string31))
+println(matcher(Regex31, string32))
+println(matcher(Regex31, string33))
+println(matcher(Regex31, string34))
+println(matcher(Regex31, string35))
+println(matcher(Regex31, string36))
+println(matcher(Regex31, string37)); println("")
+println(matcher(Regex32, string31))
+println(matcher(Regex32, string32))
+println(matcher(Regex32, string33))
+println(matcher(Regex32, string34))
+println(matcher(Regex32, string35))
+println(matcher(Regex32, string36))
+println(matcher(Regex32, string37)); println("")
+println(matcher(Regex33, string31))
+println(matcher(Regex33, string32))
+println(matcher(Regex33, string33))
+println(matcher(Regex33, string34))
+println(matcher(Regex33, string35))
+println(matcher(Regex33, string36))
+println(matcher(Regex33, string37)); println("")
+println(matcher(Regex34, string31))
+println(matcher(Regex34, string32))
+println(matcher(Regex34, string33))
+println(matcher(Regex34, string34))
+println(matcher(Regex34, string35))
+println(matcher(Regex34, string36))
+println(matcher(Regex34, string37)); println("")
+println(matcher(Regex35, string31))
+println(matcher(Regex35, string32))
+println(matcher(Regex35, string33))
+println(matcher(Regex35, string34))
+println(matcher(Regex35, string35))
+println(matcher(Regex35, string36))
+println(matcher(Regex35, string37)); println("")
+println(matcher(Regex36, string31))
+println(matcher(Regex36, string32))
+println(matcher(Regex36, string33))
+println(matcher(Regex36, string34))
+println(matcher(Regex36, string35))
+println(matcher(Regex36, string36))
+println(matcher(Regex36, string37)); println("")
+
+
+val RegexX = BETWEEN(CHAR('a'), 0, 5)
+val stringX = ""
+println(matcher(RegexX, stringX))
+
+
+
+val str0 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+val str1 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+val str2 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+
+val matchA = (c:Char) => c == 'a'
+
+val reg_1 = SEQ(SEQ(CFUN(matchA), CFUN(matchA)), CFUN(matchA))
+val reg_2 = SEQ(NTIMES(CFUN(matchA), 19), OPTIONAL(CFUN(matchA)))
+
+val reg_1_mod = PLUS(PLUS(reg_1))
+val reg_2_mod = PLUS(PLUS(reg_2))
+
+
+matcher(reg_1_mod, str0)
+matcher(reg_1_mod, str1)
+matcher(reg_1_mod, str2)
+matcher(reg_2_mod, str0)
+matcher(reg_2_mod, str1)
+matcher(reg_2_mod, str2)
+
+
+
+
+
+//Q3: matcher test (table)
+
+// a^{3}
+val re1 = NTIMES(CHAR('a'), 3)
+
+matcher(re1, "") //false
+matcher(re1, "a") //false
+matcher(re1, "aa") //false
+matcher(re1, "aaa") //true
+matcher(re1, "aaaaa") //false
+matcher(re1, "aaaaaa") //false
+
+// (a?)^{3}
+val re2 = NTIMES(OPTIONAL(CHAR('a')), 3)
+
+matcher(re2, "") //true
+matcher(re2, "a") //true
+matcher(re2, "aa") //true
+matcher(re2, "aaa") //true
+matcher(re2, "aaaaa") //false
+matcher(re2, "aaaaaa") //false
+
+// (a)^{..3}
+val re3 = UPTO((CHAR('a')), 3)
+
+matcher(re3, "") //true
+matcher(re3, "a") //true
+matcher(re3, "aa") //true
+matcher(re3, "aaa") //true
+matcher(re3, "aaaaa") //false
+matcher(re3, "aaaaaa") //false
+
+// (a?)^{..3}
+val re4 = UPTO(OPTIONAL(CHAR('a')), 3)
+
+matcher(re4, "") //true
+matcher(re4, "a") //true
+matcher(re4, "aa") //true
+matcher(re4, "aaa") //true
+matcher(re4, "aaaaa") //false
+matcher(re4, "aaaaaa") //false
+
+// (a)^{3..5}
+val re5 = BETWEEN(CHAR('a'), 3, 5)
+
+matcher(re5, "") //false
+matcher(re5, "a") //false
+matcher(re5, "aa") //false
+matcher(re5, "aaa") //true
+matcher(re5, "aaaaa") //true
+matcher(re5, "aaaaaa") //false
+
+// (a?)^{3..5}
+val re6 = BETWEEN(OPTIONAL(CHAR('a')), 3, 5)
+
+matcher(re6, "") //true
+matcher(re6, "a") //true
+matcher(re6, "aa") //true
+matcher(re6, "aaa") //true
+matcher(re6, "aaaaa") //true
+matcher(re6, "aaaaaa") //false
+
+//Q5: regular expression for email addresses
+
+val alphaNum = ('a' to 'z').toSet ++ ('0' to '9')
+val Q5email = SEQ(
+ PLUS(RANGE(alphaNum + '_' + '-' + '.')),
+ SEQ(
+ CHAR('@'),
+ SEQ(
+ PLUS(RANGE(alphaNum + '-' + '.')),
+ SEQ(
+ CHAR('.'),
+ BETWEEN(RANGE(('a' to 'z').toSet + '.'), 2, 6)
+ )
+ )
+ )
+ )
+
+ders("nicolas.volken@kcl.ac.uk".toList, Q5email)
+
+// Q6
+val Q6 = SEQ(CHAR('/'),
+ SEQ(CHAR('*'),
+ SEQ(
+
+ NOT(
+ SEQ(
+ STAR(FALL),
+ SEQ(
+ CHAR('*'),
+ SEQ(
+ CHAR('/'),
+ STAR(FALL)
+ )
+ )
+ )
+ )
+
+ ,
+ SEQ(CHAR('*'),
+ CHAR('/')
+ )
+ )
+ )
+ )
+
+matcher(Q6, "/**/")
+matcher(Q6, "/*foobar*/")
+matcher(Q6, "/*test*/test*/")
+matcher(Q6, "/*test/*test*/")
+
+// Q7
+
+val Q7r1 = SEQ(CHAR('a'), SEQ(CHAR('a'), CHAR('a')))
+val Q7r2 = SEQ(BETWEEN(CHAR('a'), 19, 19), OPTIONAL(CHAR('a')))
+
+val Q7str5 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+val Q7str6 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+val Q7str7 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+
+matcher(PLUS(PLUS(Q7r1)), Q7str5)
+matcher(PLUS(PLUS(Q7r2)), Q7str5)
+
+matcher(PLUS(PLUS(Q7r1)), Q7str6)
+matcher(PLUS(PLUS(Q7r2)), Q7str6)
+
+matcher(PLUS(PLUS(Q7r1)), Q7str7)
+matcher(PLUS(PLUS(Q7r2)), Q7str7)
+