24 def nullable (r: Rexp) : Boolean = r match { |
24 def nullable (r: Rexp) : Boolean = r match { |
25 case NULL => false |
25 case NULL => false |
26 case EMPTY => true |
26 case EMPTY => true |
27 case CHAR(_) => false |
27 case CHAR(_) => false |
28 case ALT(r1, r2) => nullable(r1) || nullable(r2) |
28 case ALT(r1, r2) => nullable(r1) || nullable(r2) |
29 case SEQ(r1, r2) => nullable(r1) && nullable(r2) |
29 case SEQ(r1, r2) => if (nullable(r1)) (zeroable(r1) && zeroable(r2)) else zeroable(r1) |
30 case STAR(_) => true |
30 case STAR(_) => true |
31 case NOT(r) => !(nullable(r)) |
31 case NOT(r) => !(nullable(r)) |
32 } |
32 } |
33 |
33 |
34 // tests whether a regular expression |
34 // tests whether a regular expression |
37 case NULL => true |
37 case NULL => true |
38 case EMPTY => false |
38 case EMPTY => false |
39 case CHAR(_) => false |
39 case CHAR(_) => false |
40 case ALT(r1, r2) => zeroable(r1) && zeroable(r2) |
40 case ALT(r1, r2) => zeroable(r1) && zeroable(r2) |
41 case SEQ(r1, r2) => if (nullable(r1)) (zeroable(r1) && zeroable(r2)) else zeroable(r1) |
41 case SEQ(r1, r2) => if (nullable(r1)) (zeroable(r1) && zeroable(r2)) else zeroable(r1) |
42 //zeroable(r1) || zeroable(r2) |
42 //case SEQ(r1, r2) => zeroable(r1) || zeroable(r2) |
43 case STAR(_) => false |
43 case STAR(_) => false |
44 case NOT(r) => !(zeroable(r)) |
44 case NOT(r) => !(zeroable(r)) |
45 } |
45 } |
46 |
46 |
47 |
47 |
88 val NONZERODIGITS = RANGE("123456789".toList) |
88 val NONZERODIGITS = RANGE("123456789".toList) |
89 |
89 |
90 val IDENT = SEQ(LETTER, STAR(ALT(LETTER,DIGITS))) |
90 val IDENT = SEQ(LETTER, STAR(ALT(LETTER,DIGITS))) |
91 val NUMBER = ALT(SEQ(NONZERODIGITS, STAR(DIGITS)), "0") |
91 val NUMBER = ALT(SEQ(NONZERODIGITS, STAR(DIGITS)), "0") |
92 val WHITESPACE = RANGE(" \n".toList) |
92 val WHITESPACE = RANGE(" \n".toList) |
|
93 val WHITESPACES = PLUS(WHITESPACE) |
93 |
94 |
94 val ALL = ALT(ALT(LETTER, DIGITS), WHITESPACE) |
95 val ALL = ALT(ALT(LETTER, DIGITS), WHITESPACE) |
95 |
96 |
96 val COMMENT = SEQ(SEQ("/*", NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL)))), "*/") |
97 val COMMENT = SEQ(SEQ("/*", NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL)))), "*/") |
97 |
98 |
100 case object T_WHITESPACE extends Token |
101 case object T_WHITESPACE extends Token |
101 case class T_IDENT(s: String) extends Token |
102 case class T_IDENT(s: String) extends Token |
102 case class T_OP(s: String) extends Token |
103 case class T_OP(s: String) extends Token |
103 case class T_NUM(n: Int) extends Token |
104 case class T_NUM(n: Int) extends Token |
104 case class T_KEYWORD(s: String) extends Token |
105 case class T_KEYWORD(s: String) extends Token |
|
106 case object T_COMMENT extends Token |
105 |
107 |
106 |
108 |
107 // an example list of rules |
109 // an example list of rules |
108 type Rule = (Rexp, List[Char] => Token) |
110 type Rule = (Rexp, List[Char] => Token) |
109 |
111 |
112 ("then", (s) => T_KEYWORD(s.mkString)), |
114 ("then", (s) => T_KEYWORD(s.mkString)), |
113 ("else", (s) => T_KEYWORD(s.mkString)), |
115 ("else", (s) => T_KEYWORD(s.mkString)), |
114 ("+", (s) => T_OP(s.mkString)), |
116 ("+", (s) => T_OP(s.mkString)), |
115 (IDENT, (s) => T_IDENT(s.mkString)), |
117 (IDENT, (s) => T_IDENT(s.mkString)), |
116 (NUMBER, (s) => T_NUM(s.mkString.toInt)), |
118 (NUMBER, (s) => T_NUM(s.mkString.toInt)), |
117 (WHITESPACE, (s) => T_WHITESPACE)) |
119 (WHITESPACES, (s) => T_WHITESPACE)) |
118 |
120 |
119 |
121 |
120 def error (s: String) = throw new IllegalArgumentException ("Could not lex " + s) |
122 def error (s: String) = throw new IllegalArgumentException ("Could not lex " + s) |
121 |
123 |
122 def munch(r: Rexp, action: List[Char] => Token, s: List[Char], t: List[Char]) : Option[(List[Char], Token)] = |
124 def munch(r: Rexp, action: List[Char] => Token, s: List[Char], t: List[Char]) : Option[(List[Char], Token)] = |
138 case _ => lex_one(rs, s) match { |
140 case _ => lex_one(rs, s) match { |
139 case (rest, t) => t :: lex_all(rs, rest) |
141 case (rest, t) => t :: lex_all(rs, rest) |
140 } |
142 } |
141 } |
143 } |
142 |
144 |
|
145 val rules: List[Rule]= |
|
146 List(("if", (s) => T_KEYWORD(s.mkString)), |
|
147 ("then", (s) => T_KEYWORD(s.mkString)), |
|
148 ("else", (s) => T_KEYWORD(s.mkString)), |
|
149 ("+", (s) => T_OP(s.mkString)), |
|
150 (IDENT, (s) => T_IDENT(s.mkString)), |
|
151 (NUMBER, (s) => T_NUM(s.mkString.toInt)), |
|
152 (WHITESPACES, (s) => T_WHITESPACE), |
|
153 (COMMENT, (s) => T_COMMENT)) |
|
154 |
|
155 println(lex_all(rules, "/*ifff if */ hhjj /*34 */".toList)) |
143 |
156 |
144 |
157 |
145 println(lex_all(rules, "if true then 42 else +".toList)) |
158 munch(COMMENT, (s) => T_COMMENT , "/*ifff if */ hhjj /*34 */".toList, Nil) |
|
159 val COMMENT2 = NOT(SEQ(SEQ(STAR(ALL), "*/"), STAR(ALL))) |
|
160 |
|
161 der('/', COMMENT) |
|
162 zeroable(der('/', COMMENT)) |
|
163 zeroable(der('a', COMMENT2)) |
|
164 |
|
165 matcher(COMMENT2, "ifff if 34") |
|
166 munch(COMMENT2, "ifff if 34".toList, Nil) |
|
167 starts_with(COMMENT2, 'i') |
|
168 lex_all(regs, "ifff if 34".toList) |
|
169 lex_all(regs, "ifff $ if 34".toList) |
|
170 |
|
171 println(lex_all(rules, "/* if true then */ then 42 else +".toList)) |
|
172 println(lex_all(rules, "if true then then 42 else +".toList)) |
146 println(lex_all(rules, "ifff if 34 34".toList)) |
173 println(lex_all(rules, "ifff if 34 34".toList)) |
147 println(lex_all(rules, "ifff $ if 34".toList)) |
174 println(lex_all(rules, "ifff $ if 34".toList)) |
148 |
175 |
149 |
176 |