diff -r d59bcff69998 -r b5b1bc0a603b solution/cw1/matcher.sc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/solution/cw1/matcher.sc Wed Dec 15 19:00:01 2021 +0000 @@ -0,0 +1,325 @@ +// CW1 + +abstract class Rexp +case object ZERO extends Rexp +case object ONE extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp + +// extended Rexps +case class RANGE(s: Set[Char]) extends Rexp +case class PLUS(r: Rexp) extends Rexp +case class OPTIONAL(r: Rexp) extends Rexp +case class NTIMES(r: Rexp, n: Int) extends Rexp +case class UPTO(r: Rexp, n: Int) extends Rexp +case class FROM(r: Rexp, n: Int) extends Rexp +case class BETWEEN(r: Rexp, n: Int, m: Int) extends Rexp +case class NOT(r: Rexp) extends Rexp + +// functions +case class CFUN(f: Char => Boolean) extends Rexp + +// abbreviations +def FCHAR(c: Char) = CFUN((a: Char) => a == c) +def FSET(s: Set[Char]) = CFUN((a: Char) => s.contains(a)) +val FALL = CFUN(_ => true) + +def nullable (r: Rexp) : Boolean = r match { + case ZERO => false + case ONE => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true + + case RANGE(_) => false + case PLUS(r1) => nullable(r1) + case OPTIONAL(_) => true + case NTIMES(r1, i) => if (i == 0) true else nullable(r1) + case UPTO(_, _) => true + case FROM(r1, i) => if (i == 0) true else nullable(r1) + case BETWEEN(r1, i, _) => if (i == 0) true else nullable(r1) + case NOT(r1) => !nullable(r1) + + case CFUN(f) => false +} + + +def der (c: Char, r: Rexp) : Rexp = r match { + case ZERO => ZERO + case ONE => ZERO + case CHAR(d) => if (c == d) ONE else ZERO + case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) + else SEQ(der(c, r1), r2) + case STAR(r1) => SEQ(der(c, r1), STAR(r1)) + + case RANGE(s) => + if (s.contains(c)) ONE else ZERO + case PLUS(r1) => SEQ(der(c, r1), STAR(r1)) + case OPTIONAL(r1) => der(c, r1) + case NTIMES(r, i) => + if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1)) + case UPTO(r1, i) => + if (i == 0) ZERO else SEQ(der(c, r1), UPTO(r1, i - 1)) + case FROM(r1, i) => + if (i == 0) SEQ(der(c, r1), FROM(r1, 0)) else + SEQ(der(c, r1), FROM(r1, i - 1)) + case BETWEEN(r1, i, j) => + if (i == 0) { + if (j == 0) ZERO else SEQ(der(c, r1), BETWEEN(r1, 0, j - 1)) + } else SEQ(der(c, r1), BETWEEN(r1, i - 1, j - 1)) + case NOT(r1) => NOT(der(c, r1)) + + case CFUN(f) => if (f(c)) ONE else ZERO +} + + +def simp(r: Rexp) : Rexp = r match { + case ALT(r1, r2) => (simp(r1), simp(r2)) match { + case (ZERO, r2s) => r2s + case (r1s, ZERO) => r1s + case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s) + } + case SEQ(r1, r2) => (simp(r1), simp(r2)) match { + case (ZERO, _) => ZERO + case (_, ZERO) => ZERO + case (ONE, r2s) => r2s + case (r1s, ONE) => r1s + case (r1s, r2s) => SEQ(r1s, r2s) + } + case r => r +} + +def ders(s: List[Char], r: Rexp) : Rexp = s match { + case Nil => r + case c::s => ders(s, simp(der(c, r))) +} + +def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) + + + +val Regex31 = NTIMES(CHAR('a'),3) +val Regex32 = NTIMES(OPTIONAL(CHAR('a')),3) +val Regex33 = UPTO(CHAR('a'),3) +val Regex34 = UPTO(OPTIONAL(CHAR('a')),3) +val Regex35 = BETWEEN(CHAR('a'),3,5) +val Regex36 = BETWEEN(OPTIONAL(CHAR('a')),3,5) +val string31 = "" +val string32 = "a" +val string33 = "aa" +val string34 = "aaa" +val string35 = "aaaa" +val string36 = "aaaaa" +val string37 = "aaaaaa" + + +println("Question3") +println(matcher(Regex31, string31)) +println(matcher(Regex31, string32)) +println(matcher(Regex31, string33)) +println(matcher(Regex31, string34)) +println(matcher(Regex31, string35)) +println(matcher(Regex31, string36)) +println(matcher(Regex31, string37)); println("") +println(matcher(Regex32, string31)) +println(matcher(Regex32, string32)) +println(matcher(Regex32, string33)) +println(matcher(Regex32, string34)) +println(matcher(Regex32, string35)) +println(matcher(Regex32, string36)) +println(matcher(Regex32, string37)); println("") +println(matcher(Regex33, string31)) +println(matcher(Regex33, string32)) +println(matcher(Regex33, string33)) +println(matcher(Regex33, string34)) +println(matcher(Regex33, string35)) +println(matcher(Regex33, string36)) +println(matcher(Regex33, string37)); println("") +println(matcher(Regex34, string31)) +println(matcher(Regex34, string32)) +println(matcher(Regex34, string33)) +println(matcher(Regex34, string34)) +println(matcher(Regex34, string35)) +println(matcher(Regex34, string36)) +println(matcher(Regex34, string37)); println("") +println(matcher(Regex35, string31)) +println(matcher(Regex35, string32)) +println(matcher(Regex35, string33)) +println(matcher(Regex35, string34)) +println(matcher(Regex35, string35)) +println(matcher(Regex35, string36)) +println(matcher(Regex35, string37)); println("") +println(matcher(Regex36, string31)) +println(matcher(Regex36, string32)) +println(matcher(Regex36, string33)) +println(matcher(Regex36, string34)) +println(matcher(Regex36, string35)) +println(matcher(Regex36, string36)) +println(matcher(Regex36, string37)); println("") + + +val RegexX = BETWEEN(CHAR('a'), 0, 5) +val stringX = "" +println(matcher(RegexX, stringX)) + + + +val str0 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +val str1 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +val str2 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + +val matchA = (c:Char) => c == 'a' + +val reg_1 = SEQ(SEQ(CFUN(matchA), CFUN(matchA)), CFUN(matchA)) +val reg_2 = SEQ(NTIMES(CFUN(matchA), 19), OPTIONAL(CFUN(matchA))) + +val reg_1_mod = PLUS(PLUS(reg_1)) +val reg_2_mod = PLUS(PLUS(reg_2)) + + +matcher(reg_1_mod, str0) +matcher(reg_1_mod, str1) +matcher(reg_1_mod, str2) +matcher(reg_2_mod, str0) +matcher(reg_2_mod, str1) +matcher(reg_2_mod, str2) + + + + + +//Q3: matcher test (table) + +// a^{3} +val re1 = NTIMES(CHAR('a'), 3) + +matcher(re1, "") //false +matcher(re1, "a") //false +matcher(re1, "aa") //false +matcher(re1, "aaa") //true +matcher(re1, "aaaaa") //false +matcher(re1, "aaaaaa") //false + +// (a?)^{3} +val re2 = NTIMES(OPTIONAL(CHAR('a')), 3) + +matcher(re2, "") //true +matcher(re2, "a") //true +matcher(re2, "aa") //true +matcher(re2, "aaa") //true +matcher(re2, "aaaaa") //false +matcher(re2, "aaaaaa") //false + +// (a)^{..3} +val re3 = UPTO((CHAR('a')), 3) + +matcher(re3, "") //true +matcher(re3, "a") //true +matcher(re3, "aa") //true +matcher(re3, "aaa") //true +matcher(re3, "aaaaa") //false +matcher(re3, "aaaaaa") //false + +// (a?)^{..3} +val re4 = UPTO(OPTIONAL(CHAR('a')), 3) + +matcher(re4, "") //true +matcher(re4, "a") //true +matcher(re4, "aa") //true +matcher(re4, "aaa") //true +matcher(re4, "aaaaa") //false +matcher(re4, "aaaaaa") //false + +// (a)^{3..5} +val re5 = BETWEEN(CHAR('a'), 3, 5) + +matcher(re5, "") //false +matcher(re5, "a") //false +matcher(re5, "aa") //false +matcher(re5, "aaa") //true +matcher(re5, "aaaaa") //true +matcher(re5, "aaaaaa") //false + +// (a?)^{3..5} +val re6 = BETWEEN(OPTIONAL(CHAR('a')), 3, 5) + +matcher(re6, "") //true +matcher(re6, "a") //true +matcher(re6, "aa") //true +matcher(re6, "aaa") //true +matcher(re6, "aaaaa") //true +matcher(re6, "aaaaaa") //false + +//Q5: regular expression for email addresses + +val alphaNum = ('a' to 'z').toSet ++ ('0' to '9') +val Q5email = SEQ( + PLUS(RANGE(alphaNum + '_' + '-' + '.')), + SEQ( + CHAR('@'), + SEQ( + PLUS(RANGE(alphaNum + '-' + '.')), + SEQ( + CHAR('.'), + BETWEEN(RANGE(('a' to 'z').toSet + '.'), 2, 6) + ) + ) + ) + ) + +ders("nicolas.volken@kcl.ac.uk".toList, Q5email) + +// Q6 +val Q6 = SEQ(CHAR('/'), + SEQ(CHAR('*'), + SEQ( + + NOT( + SEQ( + STAR(FALL), + SEQ( + CHAR('*'), + SEQ( + CHAR('/'), + STAR(FALL) + ) + ) + ) + ) + + , + SEQ(CHAR('*'), + CHAR('/') + ) + ) + ) + ) + +matcher(Q6, "/**/") +matcher(Q6, "/*foobar*/") +matcher(Q6, "/*test*/test*/") +matcher(Q6, "/*test/*test*/") + +// Q7 + +val Q7r1 = SEQ(CHAR('a'), SEQ(CHAR('a'), CHAR('a'))) +val Q7r2 = SEQ(BETWEEN(CHAR('a'), 19, 19), OPTIONAL(CHAR('a'))) + +val Q7str5 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +val Q7str6 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" +val Q7str7 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + +matcher(PLUS(PLUS(Q7r1)), Q7str5) +matcher(PLUS(PLUS(Q7r2)), Q7str5) + +matcher(PLUS(PLUS(Q7r1)), Q7str6) +matcher(PLUS(PLUS(Q7r2)), Q7str6) + +matcher(PLUS(PLUS(Q7r1)), Q7str7) +matcher(PLUS(PLUS(Q7r2)), Q7str7) +