00001
00002 #include <iostream>
00003
00004 #include "SDXXMLStreamReader.h"
00005 #include "SDXContentHandler.h"
00006
00007 using namespace std;
00008 using namespace SDX;
00009 using namespace SDX::Formats;
00010
00011 XmlStreamReader::XmlStreamReader(istream* iStream, ContentHandler* contentHandler) :
00012 StreamReader(iStream, contentHandler),
00013 m_insertTextNodeOnText(false),
00014 m_trimMode(Lines),
00015 m_sendMetaTag(false)
00016 {
00017
00018 }
00019
00020 void XmlStreamReader::setInsertTextNodeOnText(bool v){
00021 m_insertTextNodeOnText = v;
00022 }
00023
00024 void XmlStreamReader::setTrimMode(TrimMode trimMode){
00025 m_trimMode = trimMode;
00026 }
00027
00028 void XmlStreamReader::setParseMetaTag(bool v){
00029 m_sendMetaTag = v;
00030 }
00031
00032 void XmlStreamReader::startProcessing(){
00033 m_inTag = m_inMetaTag = m_hasMetaTag = m_inComment = m_inCDATA = false;
00034 }
00035
00036 bool XmlStreamReader::processLine(string& line){
00037 size_t lastNonWhitespace = line.find_last_not_of(" \t\n\r");
00038 if(lastNonWhitespace != string::npos)
00039 line.erase(lastNonWhitespace + 1);
00040
00041 while(!line.empty()){
00042 if(!stripWhitespace(line))
00043 return true;
00044
00045 char firstChar = line.at(0);
00046 if((firstChar == '<' && !m_inCDATA) || m_inComment){
00047 if(line.length() > 1 && line.at(1) == '?'){
00048 size_t firstNonName = line.find_first_of(" \t\n\r");
00049 if(firstNonName != string::npos)
00050 line.erase(0, firstNonName + 1);
00051 else
00052 break;
00053
00054 m_inTag = m_inMetaTag = true;
00055 if(m_sendMetaTag)
00056 m_contentHandler->startNode("*XML_METADATA*");
00057 m_nodeStack.push("*XML_METADATA*");
00058 } else if(m_inComment || (line.length() > 3 && line.substr(1, 3) == "!--")){
00059 size_t endOfComment = line.find("-->");
00060 if(endOfComment != string::npos){
00061 line.erase(0, endOfComment + 3);
00062 m_inComment = false;
00063 } else {
00064 m_inComment = true;
00065 break;
00066 }
00067 } else if(line.length() > 8 && line.substr(1, 8) == "![CDATA["){
00068 line.erase(0, 9);
00069 m_inCDATA = true;
00070 } else {
00071 if(m_nodeStack.size() > 1 && !m_curText.empty()){
00072 m_contentHandler->writeAttribute("", m_curText);
00073 m_curText.clear();
00074 }
00075
00076 line.erase(0, 1);
00077
00078 size_t firstNonName = line.find_first_of(" \t\n\r/?>");
00079 if(firstNonName == string::npos){
00080 if(line.at(0) == '/'){
00081 setError("Unfinished closing tag");
00082 return false;
00083 }
00084
00085 if(!m_nodeStack.empty())
00086 m_contentHandler->startNode(line);
00087
00088 m_nodeStack.push(line);
00089 m_inTag = true;
00090 break;
00091 } else if(line.at(0) == '/') {
00092 size_t firstNonName = line.find_first_of(">");
00093 if(firstNonName == string::npos){
00094 setError("Unfinished closing tag");
00095 return false;
00096 }
00097
00098 string nodeName = line.substr(1, firstNonName - 1);
00099 if(nodeName != m_nodeStack.top()){
00100 setError("Incorrect nesting");
00101 return false;
00102 } else {
00103 if(m_nodeStack.size() > 1)
00104 m_contentHandler->endNode();
00105 m_nodeStack.pop();
00106 }
00107
00108 line.erase(0, firstNonName + 1);
00109 } else {
00110 if(!m_nodeStack.empty())
00111 m_contentHandler->startNode(line.substr(0, firstNonName));
00112 m_nodeStack.push(line.substr(0, firstNonName));
00113
00114 line.erase(0, firstNonName);
00115 m_inTag = true;
00116 }
00117 }
00118 } else if(firstChar == '>' && m_inTag){
00119 line.erase(0, 1);
00120 m_inTag = false;
00121 } else if((m_inTag && firstChar == '/') || (m_inMetaTag && firstChar == '?')){
00122 line.erase(0, 1);
00123
00124 if(!stripWhitespace(line)){
00125 setError(string("Expected > after ") + firstChar);
00126 return false;
00127 }
00128
00129 if(line.empty() || line.at(0) != '>'){
00130 setError(string("Expected > after ") + firstChar);
00131 return false;
00132 }
00133 line.erase(0, 1);
00134
00135 if(m_nodeStack.size() > 1 || (m_inMetaTag && m_sendMetaTag))
00136 m_contentHandler->endNode();
00137 m_nodeStack.pop();
00138 m_inTag = m_inMetaTag = false;
00139 } else if(m_inTag) {
00140 size_t firstNonName = line.find_first_of(" \t=<>/?");
00141 if(firstNonName == string::npos){
00142 setError("Incomplete attribute");
00143 return false;
00144 }
00145 if(firstNonName == 0){
00146 setError("Incomplete attribute");
00147 return false;
00148 }
00149
00150 string attributeName = line.substr(0, firstNonName);
00151 line.erase(0, firstNonName);
00152
00153 if(!stripWhitespace(line) || line.at(0) != '='){
00154 setError("Incomplete attribute");
00155 return false;
00156 }
00157 line.erase(0, 1);
00158
00159 size_t stringEnd = findNextQuote(line);
00160 if(stringEnd == string::npos){
00161 setError("Incomplete attribute");
00162 return false;
00163 }
00164
00165 string attributeValue = line.substr(1, stringEnd - 1);
00166 line.erase(0, stringEnd + 1);
00167
00168 if(!m_inMetaTag ^ m_sendMetaTag)
00169 m_contentHandler->writeAttribute(attributeName, attributeValue);
00170 } else {
00171 if(m_curText.empty()){
00172 if(m_insertTextNodeOnText){
00173 m_contentHandler->startNode("*TEXT*");
00174 m_contentHandler->endNode();
00175 }
00176 } else {
00177 if(m_trimMode == Lines)
00178 m_curText += '\n';
00179 else
00180 m_curText += ' ';
00181 }
00182
00183 size_t textEnd = line.find(m_inCDATA ? "]]>" : "<");
00184 if(textEnd == string::npos){
00185 m_curText += line;
00186 break;
00187 } else {
00188 m_curText += line.substr(0, textEnd);
00189
00190 if(m_inCDATA){
00191 line.erase(0, textEnd + 3);
00192 m_inCDATA = false;
00193 } else
00194 line.erase(0, textEnd);
00195 }
00196 }
00197 }
00198
00199 return true;
00200 }
00201
00202 bool XmlStreamReader::stripWhitespace(string& line){
00203 size_t firstNonWhitespace = line.find_first_not_of(" \t\n\r");
00204 if(firstNonWhitespace == string::npos)
00205 return false;
00206
00207 line.erase(0, firstNonWhitespace);
00208 return true;
00209 }
00210
00211 size_t XmlStreamReader::findNextQuote(string& line){
00212 if(line.at(0) != '"' && line.at(0) != '\'')
00213 return string::npos;
00214
00215 size_t nextQuote = line.find_first_of(line.at(0), 1);
00216 return nextQuote;
00217 }