-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRegexToDfa.java
305 lines (260 loc) · 9.07 KB
/
RegexToDfa.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
package Automata_DFA;
import java.util.*;
public class RegexToDfa {
private static Set<Integer>[] followPos;
private static Node root;
private static Set<State> DStates;
private static Set<String> input; //set of characters is used in input regex
/**
* a number is assigned to each characters (even duplicate ones)
*
* @param symbNum is a hash map has a key which mentions the number and has
* a value which mentions the corresponding character or sometimes a string
* for characters is followed up by backslash like "\*"
*/
private static HashMap<Integer, String> symbNum;
public static void main(String[] args) {
initialize();
}
public static void initialize() {
Scanner in = new Scanner(System.in);
DStates = new HashSet<>();
input = new HashSet<String>();
boolean isHaveKleeneStar = false;
String language = getLanguage(in);
String regex = getRegex(in);
if(!checkRegexToLanguage(language,regex)){
System.out.println("Its not matching regex with language");
return;
}
getSymbols(regex);
/**
* giving the regex to SyntaxTree class constructor and creating the
* syntax tree of the regular expression in it
*/
SyntaxTree st = new SyntaxTree(regex);
root = st.getRoot(); //root of the syntax tree
followPos = st.getFollowPos(); //the followpos of the syntax tree
/**
* To get match for string
*/
String newLanguage = newLanguage();
int repeat = getRepeat(in);
generateAllPossibleWord(newLanguage,repeat);
// To get regex letters for creating permutations
// Because we just need regex letters , not alphabet
// String str = getWord(in);
// isWordMatch(str,newLanguage);
// System.out.println(possibleWords);
in.close();
}
private static boolean checkRegexToLanguage(String language, String regex) {
String regexLetters = "";
for(int i=0;i<regex.length();i++){
if(regex.charAt(i) == '(' || regex.charAt(i) == ')'
|| regex.charAt(i) == '*' || regex.charAt(i) == '+'
|| regex.charAt(i) == '|' || regex.charAt(i) == '#'){
continue;
}
regexLetters += regex.charAt(i);
}
for(int i=0;i<regexLetters.length();i++){
int temp_flag = 0;
for(int j=1;j<language.length();j++){
if(regexLetters.charAt(i) == language.charAt(j)){
temp_flag = 1;
break;
}
}
if (temp_flag == 0)
return false;
}
return true;
}
private static String getWord(Scanner in) {
System.out.print("Please type string which do you want to check: ");
String word = in.nextLine();
return word;
}
private static void isWordMatch(String value,String language){
if (checkString(value)) {
System.out.println((char) 27 + "[32m" + "this string is acceptable by the regex!");
} else {
System.out.println((char) 27 + "[31m" + "this string is not acceptable by the regex!");
}
}
private static void generateAllPossibleWord(String language,int repeat){
int i=0;
int sumOfLength = 0;
while(true){
if(sumOfLength == repeat){
break;
}
String generatedWord = toWord(i,language);
if(generatedWord != "" && checkString(generatedWord)){
System.out.println(generatedWord);
sumOfLength++;
}
i++;
}
}
private static String newLanguage(){
String newLanguage = "^";
for (Iterator<String> it = input.iterator(); it.hasNext(); ) {
String i = it.next();
if(i.contains("#")){
continue;
}else{
newLanguage += i;
}
}
return newLanguage;
}
private static String toWord(int sayi,String alfabe){
int alfabeLen = alfabe.length();
String word = "";
while(sayi > 0){
word = word + alfabe.charAt(sayi % alfabeLen);
sayi /= alfabeLen;
}
return word.contains("^") ? "" : word;
}
private static boolean checkString(String str){
/**
* creating the DFA using the syntax tree were created upside and
* returning the start state of the resulted DFA
*/
State q0 = createDFA();
DfaTraversal dfat = new DfaTraversal(q0, input);
boolean acc = false;
/**
* aranan kelimenin girilen alfabenin kelimelerinde oluşup oluşmadığı test ediliyor
* daha sonra traverse ederek kelimenin alfabeden üretilip üretilemeyeceği kontrol ediliyor
*/
for (char c : str.toCharArray()) {
if (dfat.setCharacter(c)) {
acc = dfat.traverse();
} else {
System.out.println("WRONG CHARACTER!");
System.exit(0);
}
}
if (acc) {
// System.out.println((char) 27 + "[32m" + "this string is acceptable by the regex!\n");
return true;
} else {
// System.out.println((char) 27 + "[31m" + "this string is not acceptable by the regex!\n");
return false;
}
}
private static String getRegex(Scanner in) {
System.out.print("Enter a regex: ");
String regex = in.nextLine();
regex = regex.replace("+","|");
return regex+"#";
}
private static void getSymbols(String regex) {
/**
* op is a set of characters have operational meaning for example '*'
* could be a closure operator
*/
Set<Character> op = new HashSet<>();
Character[] ch = {'(', ')', '*', '|', '&', '.', '\\', '[', ']', '+'};
op.addAll(Arrays.asList(ch));
input = new HashSet<>();
symbNum = new HashMap<>();
int num = 1;
for (int i = 0; i < regex.length(); i++) {
char charAt = regex.charAt(i);
/**
* if a character which is also an operator, is followed up by
* backslash ('\'), then we should consider it as a normal character
* and not an operator
*/
if (op.contains(charAt)) {
if (i - 1 >= 0 && regex.charAt(i - 1) == '\\') {
input.add("\\" + charAt);
symbNum.put(num++, "\\" + charAt);
}
} else {
input.add("" + charAt);
symbNum.put(num++, "" + charAt);
}
}
}
private static State createDFA() {
int id = 0;
Set<Integer> firstpos_n0 = root.getFirstPos();
State q0 = new State(id++);
q0.addAllToName(firstpos_n0);
if (q0.getName().contains(followPos.length)) {
q0.setAccept();
}
DStates.clear();
DStates.add(q0);
while (true) {
boolean exit = true;
State s = null;
for (State state : DStates) {
if (!state.getIsMarked()) {
exit = false;
s = state;
}
}
if (exit) {
break;
}
if (s.getIsMarked()) {
continue;
}
s.setIsMarked(true); //mark the state
Set<Integer> name = s.getName();
for (String a : input) {
Set<Integer> U = new HashSet<>();
for (int p : name) {
if (symbNum.get(p).equals(a)) {
U.addAll(followPos[p - 1]);
}
}
boolean flag = false;
State tmp = null;
for (State state : DStates) {
if (state.getName().equals(U)) {
tmp = state;
flag = true;
break;
}
}
if (!flag) {
State q = new State(id++);
q.addAllToName(U);
if (U.contains(followPos.length)) {
q.setAccept();
}
DStates.add(q);
tmp = q;
}
s.addMove(a, tmp);
}
}
return q0;
}
private static String getLanguage(Scanner in) {
System.out.print("Enter a language: ");
String str;
str = in.nextLine();
str = "^,"+str;
String[] strArr = str.split(",");
String res = "";
for(int i=0;i<strArr.length;i++){
res += strArr[i];
}
return res;
}
private static int getRepeat(Scanner in) {
System.out.print("How much do you want to see?: ");
int repeat;
repeat = in.nextInt();
return repeat;
}
}