- #1
Jamin2112
- 986
- 12
Any ideas on how I can clean up the following?
For reference:
and
Code:
void HtmlParser::parseTag(std::string::const_iterator tagBegin, std::string::const_iterator tagEnd, node & thisNode)
{
/*
tagBegin: pointer to the '<' character that starts the tag
tagEnd: pointer to the '>' character that ends the tag
thisNode: node element in whose fields the class and identifier info will be entered
E.g. If the string between tagBegin and tagEnd is
"div class= 'class1 class2 class3' id = 'myId' onlick = 'myFunction()'"
then make thisNode.element_type equal to "div"; add "class1", "class2" and "class3" to thisNode.class_set;
and make thisNode.iden equal to "myId"
Procedure:
(1) Get the first sequence of characters inside the tag. If it is a proper element type
expression, set it equal to thisNode's element_type; otherwise, throw an error.
(2) For each expression after the element type and of the form of valid attribute name-value
pairs,
(i) if the attribute's name is "class", then add each of the classes to thisNode's
class_set field by getting each substring of the attribute's value that is
separated by whitespace
(ii) if the attribute's name is "id", then set thisNode's iden field eqal to the
attribute's value
*/
// (1)
std::string str;
while (++tagBegin != tagEnd && *tagBegin != ' ')
str.push_back(*tagBegin);
if (std::regex_match(str, _elementReg))
thisNode.element_type = str;
else
throw"Could not process element type.";
// (2)
std::regex_iterator<std::string::const_iterator> regit (tagBegin, tagEnd, _attrReg);
std::regex_iterator<std::string::const_iterator> regend {std::regex_iterator<std::string::const_iterator>()};
for (; regit != regend; ++regit)
{
std::string attrName = (*regit)[1];
std::transform(attrName.begin(), attrName.end(), attrName.begin(), ::tolower);
// (2i)
if (attrName == "class")
{
std::stringstream ss((*regit)[2]);
std::string thisClass;
while (std::getline(ss, thisClass, ' '))
thisNode.class_set.insert(thisClass);
}
// (2ii)
else if (attrName == "id")
thisNode.iden = (*regit)[2];
}
}
Code:
const std::regex HtmlParser::_elementReg("[A-Za-z0-9\\-]");
const std::regex HtmlParser::_attrReg("([A-Za-z0-9\\-]+)\\s*=\\s*(['\"])(.*?)\\2");
Code:
struct node
{
std::string element_type;
std::set<std::string> class_set;
std::string iden;
std::set<node *> children;
};
Last edited: