solution/cw1/matcher.sc
author Christian Urban <christian.urban@kcl.ac.uk>
Thu, 29 Sep 2022 21:10:45 +0100
changeset 877 43460c7b2010
parent 864 b5b1bc0a603b
permissions -rw-r--r--
updated

// CW1

abstract class Rexp
case object ZERO extends Rexp
case object ONE extends Rexp
case class CHAR(c: Char) extends Rexp
case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
case class STAR(r: Rexp) extends Rexp 

// extended Rexps
case class RANGE(s: Set[Char]) extends Rexp
case class PLUS(r: Rexp) extends Rexp
case class OPTIONAL(r: Rexp) extends Rexp
case class NTIMES(r: Rexp, n: Int) extends Rexp 
case class UPTO(r: Rexp, n: Int) extends Rexp
case class FROM(r: Rexp, n: Int) extends Rexp
case class BETWEEN(r: Rexp, n: Int, m: Int) extends Rexp
case class NOT(r: Rexp) extends Rexp

// functions
case class CFUN(f: Char => Boolean) extends Rexp

// abbreviations
def FCHAR(c: Char) = CFUN((a: Char) => a == c)
def FSET(s: Set[Char]) = CFUN((a: Char) => s.contains(a))
val FALL = CFUN(_ => true)

def nullable (r: Rexp) : Boolean = r match {
  case ZERO => false
  case ONE => true
  case CHAR(_) => false
  case ALT(r1, r2) => nullable(r1) || nullable(r2)
  case SEQ(r1, r2) => nullable(r1) && nullable(r2)
  case STAR(_) => true

  case RANGE(_) => false 
  case PLUS(r1) => nullable(r1)
  case OPTIONAL(_) => true
  case NTIMES(r1, i) => if (i == 0) true else nullable(r1)
  case UPTO(_, _) => true
  case FROM(r1, i) => if (i == 0) true else nullable(r1)
  case BETWEEN(r1, i, _) => if (i == 0) true else nullable(r1)
  case NOT(r1) => !nullable(r1)

  case CFUN(f) => false
}


def der (c: Char, r: Rexp) : Rexp = r match {
  case ZERO => ZERO
  case ONE => ZERO
  case CHAR(d) => if (c == d) ONE else ZERO
  case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
  case SEQ(r1, r2) => 
    if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
    else SEQ(der(c, r1), r2)
  case STAR(r1) => SEQ(der(c, r1), STAR(r1))

  case RANGE(s) =>
    if (s.contains(c)) ONE else ZERO 
  case PLUS(r1) => SEQ(der(c, r1), STAR(r1))
  case OPTIONAL(r1) => der(c, r1)
  case NTIMES(r, i) => 
    if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1))
  case UPTO(r1, i) => 
    if (i == 0) ZERO else SEQ(der(c, r1), UPTO(r1, i - 1)) 
  case FROM(r1, i) =>
    if (i == 0) SEQ(der(c, r1), FROM(r1, 0)) else
    SEQ(der(c, r1), FROM(r1, i - 1))
  case BETWEEN(r1, i, j) => 
    if (i == 0) {
      if (j == 0) ZERO else SEQ(der(c, r1), BETWEEN(r1, 0, j - 1))
    } else SEQ(der(c, r1), BETWEEN(r1, i - 1, j - 1))
  case NOT(r1) => NOT(der(c, r1))

  case CFUN(f) => if (f(c)) ONE else ZERO
}


def simp(r: Rexp) : Rexp = r match {
  case ALT(r1, r2) => (simp(r1), simp(r2)) match {
    case (ZERO, r2s) => r2s
    case (r1s, ZERO) => r1s
    case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s)
  }
  case SEQ(r1, r2) =>  (simp(r1), simp(r2)) match {
    case (ZERO, _) => ZERO
    case (_, ZERO) => ZERO
    case (ONE, r2s) => r2s
    case (r1s, ONE) => r1s
    case (r1s, r2s) => SEQ(r1s, r2s)
  }
  case r => r
}

def ders(s: List[Char], r: Rexp) : Rexp = s match {
  case Nil => r
  case c::s => ders(s, simp(der(c, r)))
}

def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))



val Regex31 = NTIMES(CHAR('a'),3)
val Regex32 = NTIMES(OPTIONAL(CHAR('a')),3)
val Regex33 = UPTO(CHAR('a'),3)
val Regex34 = UPTO(OPTIONAL(CHAR('a')),3)
val Regex35 = BETWEEN(CHAR('a'),3,5)
val Regex36 = BETWEEN(OPTIONAL(CHAR('a')),3,5)
val string31 = ""
val string32 = "a"
val string33 = "aa"
val string34 = "aaa"
val string35 = "aaaa"
val string36 = "aaaaa"
val string37 = "aaaaaa"


println("Question3")
println(matcher(Regex31, string31))
println(matcher(Regex31, string32))
println(matcher(Regex31, string33))
println(matcher(Regex31, string34))
println(matcher(Regex31, string35))
println(matcher(Regex31, string36))
println(matcher(Regex31, string37)); println("")
println(matcher(Regex32, string31))
println(matcher(Regex32, string32))
println(matcher(Regex32, string33))
println(matcher(Regex32, string34))
println(matcher(Regex32, string35))
println(matcher(Regex32, string36))
println(matcher(Regex32, string37)); println("")
println(matcher(Regex33, string31))
println(matcher(Regex33, string32))
println(matcher(Regex33, string33))
println(matcher(Regex33, string34))
println(matcher(Regex33, string35))
println(matcher(Regex33, string36))
println(matcher(Regex33, string37)); println("")
println(matcher(Regex34, string31))
println(matcher(Regex34, string32))
println(matcher(Regex34, string33))
println(matcher(Regex34, string34))
println(matcher(Regex34, string35))
println(matcher(Regex34, string36))
println(matcher(Regex34, string37)); println("")
println(matcher(Regex35, string31))
println(matcher(Regex35, string32))
println(matcher(Regex35, string33))
println(matcher(Regex35, string34))
println(matcher(Regex35, string35))
println(matcher(Regex35, string36))
println(matcher(Regex35, string37)); println("")
println(matcher(Regex36, string31))
println(matcher(Regex36, string32))
println(matcher(Regex36, string33))
println(matcher(Regex36, string34))
println(matcher(Regex36, string35))
println(matcher(Regex36, string36))
println(matcher(Regex36, string37)); println("")


val RegexX = BETWEEN(CHAR('a'), 0, 5)
val stringX = ""
println(matcher(RegexX, stringX))



val str0 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
val str1 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
val str2 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"

val matchA = (c:Char) => c == 'a'

val reg_1 = SEQ(SEQ(CFUN(matchA), CFUN(matchA)), CFUN(matchA))
val reg_2 = SEQ(NTIMES(CFUN(matchA), 19), OPTIONAL(CFUN(matchA)))

val reg_1_mod = PLUS(PLUS(reg_1))
val reg_2_mod = PLUS(PLUS(reg_2))


matcher(reg_1_mod, str0)
matcher(reg_1_mod, str1)
matcher(reg_1_mod, str2)
matcher(reg_2_mod, str0)
matcher(reg_2_mod, str1)
matcher(reg_2_mod, str2)





//Q3: matcher test (table)

// a^{3}
val re1 = NTIMES(CHAR('a'), 3)

matcher(re1, "")        //false
matcher(re1, "a")       //false
matcher(re1, "aa")      //false
matcher(re1, "aaa")     //true
matcher(re1, "aaaaa")   //false
matcher(re1, "aaaaaa")  //false

// (a?)^{3}
val re2 = NTIMES(OPTIONAL(CHAR('a')), 3)

matcher(re2, "")        //true
matcher(re2, "a")       //true
matcher(re2, "aa")      //true
matcher(re2, "aaa")     //true
matcher(re2, "aaaaa")   //false
matcher(re2, "aaaaaa")  //false

// (a)^{..3}
val re3 = UPTO((CHAR('a')), 3)

matcher(re3, "")        //true
matcher(re3, "a")       //true
matcher(re3, "aa")      //true
matcher(re3, "aaa")     //true
matcher(re3, "aaaaa")   //false
matcher(re3, "aaaaaa")  //false

// (a?)^{..3}
val re4 = UPTO(OPTIONAL(CHAR('a')), 3)

matcher(re4, "")        //true
matcher(re4, "a")       //true
matcher(re4, "aa")      //true
matcher(re4, "aaa")     //true
matcher(re4, "aaaaa")   //false
matcher(re4, "aaaaaa")  //false

// (a)^{3..5}
val re5 = BETWEEN(CHAR('a'), 3, 5)

matcher(re5, "")        //false
matcher(re5, "a")       //false
matcher(re5, "aa")      //false
matcher(re5, "aaa")     //true
matcher(re5, "aaaaa")   //true
matcher(re5, "aaaaaa")  //false

// (a?)^{3..5}
val re6 = BETWEEN(OPTIONAL(CHAR('a')), 3, 5)

matcher(re6, "")        //true
matcher(re6, "a")       //true
matcher(re6, "aa")      //true
matcher(re6, "aaa")     //true
matcher(re6, "aaaaa")   //true
matcher(re6, "aaaaaa")  //false

//Q5: regular expression for email addresses

val alphaNum = ('a' to 'z').toSet ++ ('0' to '9')
val Q5email = SEQ(
                PLUS(RANGE(alphaNum + '_' + '-' + '.')), 
                SEQ(
                    CHAR('@'), 
                    SEQ(
                        PLUS(RANGE(alphaNum + '-' + '.')), 
                        SEQ(
                            CHAR('.'), 
                            BETWEEN(RANGE(('a' to 'z').toSet + '.'), 2, 6)
                            )
                      )
                )
              )

ders("nicolas.volken@kcl.ac.uk".toList, Q5email)

// Q6
val Q6 = SEQ(CHAR('/'),
            SEQ(CHAR('*'),
                SEQ(
                  
                    NOT(
                      SEQ(
                        STAR(FALL),
                        SEQ(
                          CHAR('*'),
                          SEQ(
                            CHAR('/'),
                            STAR(FALL)
                          )
                        )
                      )
                    )
                  
                    ,
                    SEQ(CHAR('*'),
                    CHAR('/')
                    )
                )
            )
        )

matcher(Q6, "/**/")
matcher(Q6, "/*foobar*/")
matcher(Q6, "/*test*/test*/")
matcher(Q6, "/*test/*test*/")

// Q7

val Q7r1 = SEQ(CHAR('a'), SEQ(CHAR('a'), CHAR('a')))
val Q7r2 = SEQ(BETWEEN(CHAR('a'), 19, 19), OPTIONAL(CHAR('a')))

val Q7str5 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
val Q7str6 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
val Q7str7 = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"

matcher(PLUS(PLUS(Q7r1)), Q7str5)
matcher(PLUS(PLUS(Q7r2)), Q7str5)

matcher(PLUS(PLUS(Q7r1)), Q7str6)
matcher(PLUS(PLUS(Q7r2)), Q7str6)

matcher(PLUS(PLUS(Q7r1)), Q7str7)
matcher(PLUS(PLUS(Q7r2)), Q7str7)