- #1
Tom McCurdy
- 1,020
- 1
I wrote the following code: (well I wrote the markup function within it, the rest was written by my teacher)
and It works for most things, but I was trying to figure out while on some cases such as the boring.txt (please rename extention to .htm or .html if you test it) the program aborts. the program reads in a file then you give a vector of search terms and it finds them
Code:
// markup.cpp
//
// You must complete this program by properly implementing
// the routine called markup. You can implement other routines
// to help markup do its work.
// ==================================================================
#include <iostream>
#include <fstream>
#include <string>
#include <cctype>
#include <cstdlib>
#include <cassert>
#include <vector>
using namespace std;
// ==================================================================
// The characters used to mark the opening and closing of tags,
// the name of the body tag and the body closing tag (in lower case),
// and the markup strings used. Note that all of these are marked as
// constant so they cannot be changed by the executing algorithm.
// You do not need to use this, but are encouraged to use them, or
// something like them.
// ==================================================================
const char opentag ='<';
const char closetag ='>';
const string bodytag("body");
const string bodyclosetag("/body");
const string startMarkup("<B style=\"color:black;background-color:#A0FFFF\">");
const string endMarkup("</B>");
// ==================================================================
// Declarations of routines that are provided to you
// ==================================================================
void lowercase(string & s);
void printUsage(char * argv[]);
void parseInput(int argc, char * argv[], ifstream & in, ofstream & out,
vector<string> & searchTerms);
void printSearchWords(const vector<string> & searchTerms);
// ==================================================================
// Declarations of the routine that you must complete
// ==================================================================
void markup(istream & in, ostream & out, const vector<string> & searchTerms);
// ==================================================================
// You should declare other routines that you use in implementing
// your markup routine
// ==================================================================
// ==================================================================
// ==================================================================
// ===>> main <<=====================================================
// main takes arguments from the command line, which represent
// the name of the code, the name of the inputfile, and a list of
// words to search for. argc is the number of argumens provided, and
// argv is an array of c-strings that store the command line arguments
//
// Main is complete. You do not need to change anything in main.
int main(int argc, char * argv[])
{
// Make sure there are at least 3 words on the command line
if(argc < 3) printUsage(argv);
// Get the input and output streams, and the lowercase search words
ifstream in;
ofstream out;
vector<string> searchTerms;
parseInput(argc, argv, in, out, searchTerms);
// printSearchWords(searchTerms); // Used when testing parseInput
// Students must write this routine (see below)
markup(in, out, searchTerms);
// Close the input and output streams
in.close();
out.close();
return EXIT_SUCCESS;
}
// ===>> markup <<===================================================
// Read data from the in stream, find all of the words from the list
// of searchTerms, mark them bold on a cyan background, and write the
// marked up data to the outstream. On entry the input stream will
// be ready for use and on the first character, the output stream
// will also be ready for use. searchTerms will be a vector of words
// to search for; it will contain at least one word, and all the search
// words will be in lowercase.
// ==================================================================
void markup(istream & in, ostream & out, const vector<string> & searchTerms)
{
vector <string> theColors;
theColors.push_back("F40500");
theColors.push_back("9AFEF4");
theColors.push_back("FECC30");
theColors.push_back("6185FA");
theColors.push_back("DE5D77");
theColors.push_back("00FF00");
theColors.push_back("CC00CC");
theColors.push_back("FFCC00");
theColors.push_back("99CCFF");
theColors.push_back("CCCC99");
theColors.push_back("CCFFCC");
//vector <string>(F40500,9AFEF4,FECC30,6185FA,DE5D77);
string content, bodyContent, front, middle, end, bodyCopy, final, theTop, theBottom;
string eCode= "</B>";
vector<string> bodyTemp;
int gate=0, posStart,posMiddle, posEnd;
// This routine does not do anything useful yet.
// You will make it useful. At present it just
// copies the input stream to the output stream one
// character at a time
char c;
c = in.get(); // get one character from the input
while(not in.fail())
{
content.push_back(c);
//cout << c;
// input succeded, so write the charcter to output
c = in.get(); // get one character from the input
}
int pleaseStop = searchTerms.size();
for(int kk=0; kk<pleaseStop; kk++)
{
string sCode("<B style=\"color:black;background-color:#");
string s2Code=theColors[kk%11];
string s3Code="\">";
sCode= sCode+s2Code+s3Code;
bodyContent=content;
posStart = bodyContent.find("<body");
bodyContent=bodyContent.erase( 0,posStart + 1);
posMiddle = bodyContent.find(">");
bodyContent=bodyContent.erase( 0,posMiddle + 1);
posEnd = bodyContent.find("</body>");
bodyContent = bodyContent.erase( posEnd,bodyContent.size() );
theTop = content;
theTop = theTop.erase( posStart+posMiddle+2, theTop.size());
theBottom = content;
int posBody = bodyContent.find("</body>");
theBottom = "</body></html>";
bodyCopy=bodyContent;
int k=0;
int inTag=0;
char please;
char periodHere='.';
while(k < bodyCopy.size())
{
bodyCopy[k] = tolower(bodyCopy[k]);
please=bodyCopy[k];
//cout<<please<<endl;
if (please=='<')
{
inTag=1;
}
if (inTag==1)
{
bodyCopy[k]=periodHere;
}
if (please=='>')
{
inTag=0;
}
k = k + 1;
}
//cout << endl << endl << bodyContent <<endl << endl<<bodyCopy<<endl;
if (bodyCopy.npos == bodyCopy.find(searchTerms[kk]))
final=bodyContent;
while ( bodyCopy.npos != bodyCopy.find(searchTerms[kk]))
{
int pos = bodyCopy.find(searchTerms[kk]);
char x,y;
if (pos != 0 && pos !=bodyCopy.size() )
{
x=bodyCopy[pos-1];
y=bodyCopy[pos + searchTerms[kk].size()];
}
else if(pos==0)
{
x=0;
y=bodyCopy[pos+searchTerms[kk].size()];
}
else
{
y=0;
x=bodyCopy[pos-1];
}
front=front.erase();
middle=middle.erase();
end=end.erase();
int j=0;
while (j < pos)
{
front.push_back(bodyContent[j]);
j++;
}
j=pos;
while (j<pos+searchTerms[kk].size())
{
middle.push_back(bodyContent[j]);
j++;
}
j=pos+searchTerms[kk].size();
while (j<bodyContent.size())
{
end.push_back(bodyContent[j]);
j++;
}
if ( (isalnum(x)==0) && (isalnum(y)==0) )
{
final = final + front + sCode + middle + eCode;
}
else
{
final = final + front + middle;
}
bodyContent=bodyContent.erase( 0,pos + searchTerms[kk].size());
bodyCopy=bodyCopy.erase( 0,pos + searchTerms[kk].size());
}
//cout<<endl<<"theTop: "<<theTop<<endl;
//cout<<endl<<"final: "<<final<<endl;
//cout<<endl<<"end: "<<end<<endl;
//cout<<endl<<"theBottom: "<<theBottom;
final = theTop + final + end + theBottom;
//cout<<endl<<"final: "<<final<<endl;
content=final;
bodyContent=bodyContent.erase();
front=front.erase();
middle=middle.erase();
end=end.erase();
bodyCopy=bodyCopy.erase();
final=final.erase();
theTop=theTop.erase();
theBottom=theBottom.erase();
}
//cout<<endl<<final;
out<<content;
}
// ===>> lowercase <<================================================
// Convert string to lowercase, in place
// ==================================================================
void lowercase(string & s)
{
unsigned int i = 0;
while(i < s.size())
{
s[i] = tolower(s[i]);
i = i + 1;
}
}
// ===>> printUsage <<===============================================
// Print a usage message
// ==================================================================
void printUsage(char * argv[])
{
cerr << "Usage: " << argv[0] << " inputfile word1 [word2 word3 ... ]\n";
exit(EXIT_FAILURE);
}
// ===>> parseInput <<===============================================
// Get the input file name, open the input and output files, and get
// the search words. argc is the number of command line arguments,
// and argv is an array of those arguments as c-strings. On exit in
// and out will be connected to files and ready for use, and
// searchTerms will be contain a list of strings containing words
// to search for, converted to lowercase letters.
//
// Requires that argc >= 3.
// ==================================================================
void parseInput(int argc, char * argv[],
ifstream & in, ofstream & out,
vector<string> & searchTerms)
{
// attach into the input file
in.open(argv[1]);
if(in.fail())
{
cerr << "Input file " << argv[1] << " did not open\n";
exit(EXIT_FAILURE);
}
// attach out to the output file
string outname = "marked";
outname.append(argv[1]);
out.open(outname.c_str());
if(out.fail())
{
cerr << "Output file " << outname << " did not open\n";
exit(EXIT_FAILURE);
}
// get the first search word and convert it to lowercase
string s(argv[2]);
lowercase(s);
searchTerms.push_back(s);
// get any additional search words and convert them to lowercase
for(unsigned int i = 3; i < argc; i = i + 1)
{
s = argv[i];
lowercase(s);
searchTerms.push_back(s);
}
}
// ===>> printSearchWords <<=========================================
// Print the words in searchTerms to standard output. This was used
// for testing, but not in the final code.
// ==================================================================
void printSearchWords(const vector<string> & searchTerms)
{
cout << "The search words are: \n";
unsigned int i = 0;
while(i < searchTerms.size())
{
cout << " " << searchTerms[i] << endl;
i = i + 1;
}
}
and It works for most things, but I was trying to figure out while on some cases such as the boring.txt (please rename extention to .htm or .html if you test it) the program aborts. the program reads in a file then you give a vector of search terms and it finds them