diff -r 908e4f6cdf7c -r 4d5058706f1b solution/cw1/matcher.sc --- a/solution/cw1/matcher.sc Fri Oct 28 09:08:13 2022 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,325 +0,0 @@ -// CW1 - -abstract class Rexp -case object ZERO extends Rexp -case object ONE extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp - -// extended Rexps -case class RANGE(s: Set[Char]) extends Rexp -case class PLUS(r: Rexp) extends Rexp -case class OPTIONAL(r: Rexp) extends Rexp -case class NTIMES(r: Rexp, n: Int) extends Rexp -case class UPTO(r: Rexp, n: Int) extends Rexp -case class FROM(r: Rexp, n: Int) extends Rexp -case class BETWEEN(r: Rexp, n: Int, m: Int) extends Rexp -case class NOT(r: Rexp) extends Rexp - -// functions -case class CFUN(f: Char => Boolean) extends Rexp - -// abbreviations -def FCHAR(c: Char) = CFUN((a: Char) => a == c) -def FSET(s: Set[Char]) = CFUN((a: Char) => s.contains(a)) -val FALL = CFUN(_ => true) - -def nullable (r: Rexp) : Boolean = r match { - case ZERO => false - case ONE => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true - - case RANGE(_) => false - case PLUS(r1) => nullable(r1) - case OPTIONAL(_) => true - case NTIMES(r1, i) => if (i == 0) true else nullable(r1) - case UPTO(_, _) => true - case FROM(r1, i) => if (i == 0) true else nullable(r1) - case BETWEEN(r1, i, _) => if (i == 0) true else nullable(r1) - case NOT(r1) => !nullable(r1) - - case CFUN(f) => false -} - - -def der (c: Char, r: Rexp) : Rexp = r match { - case ZERO => ZERO - case ONE => ZERO - case CHAR(d) => if (c == d) ONE else ZERO - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r1) => SEQ(der(c, r1), STAR(r1)) - - case RANGE(s) => - if (s.contains(c)) ONE else ZERO - case PLUS(r1) => SEQ(der(c, r1), STAR(r1)) - case OPTIONAL(r1) => der(c, r1) - case NTIMES(r, i) => - if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1)) - case UPTO(r1, i) => - if (i == 0) ZERO else SEQ(der(c, r1), UPTO(r1, i - 1)) - case FROM(r1, i) => - if (i == 0) SEQ(der(c, r1), FROM(r1, 0)) else - SEQ(der(c, r1), FROM(r1, i - 1)) - case BETWEEN(r1, i, j) => - if (i == 0) { - if (j == 0) ZERO else SEQ(der(c, r1), BETWEEN(r1, 0, j - 1)) - } else SEQ(der(c, r1), BETWEEN(r1, i - 1, j - 1)) - case NOT(r1) => NOT(der(c, r1)) - - case CFUN(f) => if (f(c)) ONE else ZERO -} - - -def simp(r: Rexp) : Rexp = r match { - case ALT(r1, r2) => (simp(r1), simp(r2)) match { - case (ZERO, r2s) => r2s - case (r1s, ZERO) => r1s - case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s) - } - case SEQ(r1, r2) => (simp(r1), simp(r2)) match { - case (ZERO, _) => ZERO - case (_, ZERO) => ZERO - case (ONE, r2s) => r2s - case (r1s, ONE) => r1s - case (r1s, r2s) => SEQ(r1s, r2s) - } - case r => r -} - -def ders(s: List[Char], r: Rexp) : Rexp = s match { - case Nil => r - case c::s => ders(s, simp(der(c, r))) -} - -def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r)) - - - -val Regex31 = NTIMES(CHAR('a'),3) -val Regex32 = NTIMES(OPTIONAL(CHAR('a')),3) -val Regex33 = UPTO(CHAR('a'),3) -val Regex34 = UPTO(OPTIONAL(CHAR('a')),3) -val Regex35 = BETWEEN(CHAR('a'),3,5) -val Regex36 = BETWEEN(OPTIONAL(CHAR('a')),3,5) -val string31 = "" -val string32 = "a" -val string33 = "aa" -val string34 = "aaa" -val string35 = "aaaa" -val string36 = "aaaaa" -val string37 = "aaaaaa" - - -println("Question3") -println(matcher(Regex31, string31)) -println(matcher(Regex31, string32)) -println(matcher(Regex31, string33)) -println(matcher(Regex31, string34)) -println(matcher(Regex31, string35)) -println(matcher(Regex31, string36)) -println(matcher(Regex31, string37)); println("") -println(matcher(Regex32, string31)) -println(matcher(Regex32, string32)) -println(matcher(Regex32, string33)) -println(matcher(Regex32, string34)) -println(matcher(Regex32, string35)) -println(matcher(Regex32, string36)) -println(matcher(Regex32, string37)); println("") -println(matcher(Regex33, string31)) -println(matcher(Regex33, string32)) -println(matcher(Regex33, string33)) -println(matcher(Regex33, string34)) -println(matcher(Regex33, string35)) -println(matcher(Regex33, string36)) -println(matcher(Regex33, string37)); println("") -println(matcher(Regex34, string31)) -println(matcher(Regex34, string32)) -println(matcher(Regex34, string33)) -println(matcher(Regex34, string34)) -println(matcher(Regex34, string35)) -println(matcher(Regex34, string36)) -println(matcher(Regex34, string37)); println("") -println(matcher(Regex35, string31)) -println(matcher(Regex35, string32)) -println(matcher(Regex35, string33)) -println(matcher(Regex35, string34)) -println(matcher(Regex35, string35)) -println(matcher(Regex35, string36)) -println(matcher(Regex35, string37)); println("") -println(matcher(Regex36, string31)) -println(matcher(Regex36, string32)) -println(matcher(Regex36, string33)) -println(matcher(Regex36, string34)) -println(matcher(Regex36, string35)) -println(matcher(Regex36, string36)) -println(matcher(Regex36, string37)); println("") - - -val RegexX = BETWEEN(CHAR('a'), 0, 5) -val stringX = "" -println(matcher(RegexX, stringX)) - - - -val str0 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" -val str1 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" -val str2 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" - -val matchA = (c:Char) => c == 'a' - -val reg_1 = SEQ(SEQ(CFUN(matchA), CFUN(matchA)), CFUN(matchA)) -val reg_2 = SEQ(NTIMES(CFUN(matchA), 19), OPTIONAL(CFUN(matchA))) - -val reg_1_mod = PLUS(PLUS(reg_1)) -val reg_2_mod = PLUS(PLUS(reg_2)) - - -matcher(reg_1_mod, str0) -matcher(reg_1_mod, str1) -matcher(reg_1_mod, str2) -matcher(reg_2_mod, str0) -matcher(reg_2_mod, str1) -matcher(reg_2_mod, str2) - - - - - -//Q3: matcher test (table) - -// a^{3} -val re1 = NTIMES(CHAR('a'), 3) - -matcher(re1, "") //false -matcher(re1, "a") //false -matcher(re1, "aa") //false -matcher(re1, "aaa") //true -matcher(re1, "aaaaa") //false -matcher(re1, "aaaaaa") //false - -// (a?)^{3} -val re2 = NTIMES(OPTIONAL(CHAR('a')), 3) - -matcher(re2, "") //true -matcher(re2, "a") //true -matcher(re2, "aa") //true -matcher(re2, "aaa") //true -matcher(re2, "aaaaa") //false -matcher(re2, "aaaaaa") //false - -// (a)^{..3} -val re3 = UPTO((CHAR('a')), 3) - -matcher(re3, "") //true -matcher(re3, "a") //true -matcher(re3, "aa") //true -matcher(re3, "aaa") //true -matcher(re3, "aaaaa") //false -matcher(re3, "aaaaaa") //false - -// (a?)^{..3} -val re4 = UPTO(OPTIONAL(CHAR('a')), 3) - -matcher(re4, "") //true -matcher(re4, "a") //true -matcher(re4, "aa") //true -matcher(re4, "aaa") //true -matcher(re4, "aaaaa") //false -matcher(re4, "aaaaaa") //false - -// (a)^{3..5} -val re5 = BETWEEN(CHAR('a'), 3, 5) - -matcher(re5, "") //false -matcher(re5, "a") //false -matcher(re5, "aa") //false -matcher(re5, "aaa") //true -matcher(re5, "aaaaa") //true -matcher(re5, "aaaaaa") //false - -// (a?)^{3..5} -val re6 = BETWEEN(OPTIONAL(CHAR('a')), 3, 5) - -matcher(re6, "") //true -matcher(re6, "a") //true -matcher(re6, "aa") //true -matcher(re6, "aaa") //true -matcher(re6, "aaaaa") //true -matcher(re6, "aaaaaa") //false - -//Q5: regular expression for email addresses - -val alphaNum = ('a' to 'z').toSet ++ ('0' to '9') -val Q5email = SEQ( - PLUS(RANGE(alphaNum + '_' + '-' + '.')), - SEQ( - CHAR('@'), - SEQ( - PLUS(RANGE(alphaNum + '-' + '.')), - SEQ( - CHAR('.'), - BETWEEN(RANGE(('a' to 'z').toSet + '.'), 2, 6) - ) - ) - ) - ) - -ders("nicolas.volken@kcl.ac.uk".toList, Q5email) - -// Q6 -val Q6 = SEQ(CHAR('/'), - SEQ(CHAR('*'), - SEQ( - - NOT( - SEQ( - STAR(FALL), - SEQ( - CHAR('*'), - SEQ( - CHAR('/'), - STAR(FALL) - ) - ) - ) - ) - - , - SEQ(CHAR('*'), - CHAR('/') - ) - ) - ) - ) - -matcher(Q6, "/**/") -matcher(Q6, "/*foobar*/") -matcher(Q6, "/*test*/test*/") -matcher(Q6, "/*test/*test*/") - -// Q7 - -val Q7r1 = SEQ(CHAR('a'), SEQ(CHAR('a'), CHAR('a'))) -val Q7r2 = SEQ(BETWEEN(CHAR('a'), 19, 19), OPTIONAL(CHAR('a'))) - -val Q7str5 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" -val Q7str6 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" -val Q7str7 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" - -matcher(PLUS(PLUS(Q7r1)), Q7str5) -matcher(PLUS(PLUS(Q7r2)), Q7str5) - -matcher(PLUS(PLUS(Q7r1)), Q7str6) -matcher(PLUS(PLUS(Q7r2)), Q7str6) - -matcher(PLUS(PLUS(Q7r1)), Q7str7) -matcher(PLUS(PLUS(Q7r2)), Q7str7) -