-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathFile_indexing.cpp
More file actions
145 lines (131 loc) · 3.16 KB
/
File_indexing.cpp
File metadata and controls
145 lines (131 loc) · 3.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
/*BST based file indexing program
*/
#include<iostream>
#include"BST.h"
#include<fstream>
using namespace std;
struct linked_list {
string file_name;
int line_no;
linked_list* next;
};
class TermRec
{
friend ostream& operator<<( ostream&, TermRec& );
private:
string Word;
linked_list *occ_list;
public:
TermRec () {
occ_list = NULL;}
TermRec (string x) {
occ_list = NULL;
Word = x;}
string get_word();
void Insert(string fname , int l_no);
linked_list* get_occlist();
bool operator==( TermRec& );
bool operator<( TermRec& );
bool operator>( TermRec& );
};
string TermRec :: get_word()
{
return Word;
}
void TermRec :: Insert(string fname , int l_no)
{
linked_list* ptr = new linked_list;
ptr->file_name = fname;
ptr->line_no = l_no;
ptr->next = NULL;
if (occ_list == NULL)
occ_list = ptr;
else {
linked_list *current = occ_list;
while (current->next)
current = current -> next;
current->next = ptr;}
}
linked_list* TermRec :: get_occlist()
{
return occ_list;
}
bool TermRec :: operator==( TermRec &B )
{
if (this->Word == B.Word)
return true;
else
return false;
}
bool TermRec :: operator<( TermRec &B )
{
if (this->Word < B.Word)
return true;
else
return false;
}
bool TermRec :: operator>( TermRec &B )
{
if (this->Word > B.Word)
return true;
else
return false;
}
ostream& operator<<( ostream& output, TermRec& B)
{
output << B.Word;
return output;
}
void write_to_file ( TreeNode<TermRec> *p , ofstream &fp ) //recursive function to write the content of BST to the file (preorder)
{
if (!p)
return;
fp << p->get_data()->get_word() << " ";
linked_list *current = p->get_data()->get_occlist();
while (current) {
fp << current->file_name << "-" << current->line_no << " "; //inserted as 'filename-line', each pair separated by ' '
current = current -> next; }
fp << endl;
write_to_file ( p->get_lchild() , fp );
write_to_file ( p->get_rchild() , fp );
}
int main(int argc , char* argv[])
{
BST<TermRec> T;
ifstream F("flist.txt") , file ;
ofstream fp(argv[1]);
if (!F.is_open()) {
cout << "\nError opening file flist.txt , Aborting!!";
return -1;}
string file_to_be_indexed , line , word , file_name;
int line_no , i , start;
TermRec *Node , *word_obj;
while (getline( F , file_to_be_indexed )) {
file.open(file_to_be_indexed.c_str());
if (file.is_open()) {
start = 0;
while ( (i = file_to_be_indexed.find("/",start)) != std::string::npos ) //finding file name
start = i+1;
file_name = file_to_be_indexed.substr(start);
line_no = 1;
while (getline(file , line)) {
start = 0;
line.append(" ");
while ( (i = line.find(" ",start)) != std::string::npos) {
word = line.substr(start , i-start);
for ( int j=0 ; j < word.length() ; j++)
word[j] = tolower(word[j]);
word_obj = new TermRec( word );
Node = T.Insert(word_obj);
Node->Insert(file_name , line_no);
delete word_obj;
start = i+1;
while (line[start] == ' ') //to accomodate more than one space between words
start++;}
line_no++;}
file.close();}
else
cout << "\nError opening file " << file_to_be_indexed; }
write_to_file(T.get_root() , fp);
return 0;
}