--- /dev/null
+/***************************************************************************
+ copyright : (C) 2005 by Inge Wallin
+ email : inge@lysator.liu.se
+ ***************************************************************************/
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+
+#include <ctype.h>
+
+#include <kdebug.h>
+
+#include "kalziumdataobject.h"
+#include "moleculeparser.h"
+
+
+// ================================================================
+// class ElementCountMap
+
+
+
+ElementCountMap::ElementCountMap()
+{
+ m_map.clear();
+}
+
+
+ElementCountMap::~ElementCountMap()
+{
+}
+
+
+ElementCount *
+ElementCountMap::search(Element *_element)
+{
+ QList<ElementCount *>::ConstIterator it = m_map.constBegin();
+ const QList<ElementCount *>::ConstIterator itEnd = m_map.constEnd();
+
+ for (; it != itEnd; ++it) {
+ if ((*it)->element() == _element)
+ return *it;
+ }
+
+ return 0;
+}
+
+
+void
+ElementCountMap::add(ElementCountMap &_map)
+{
+ QList<ElementCount *>::ConstIterator it = _map.m_map.constBegin();
+ const QList<ElementCount *>::ConstIterator itEnd = _map.m_map.constEnd();
+
+ // Step throught _map and for each element, add it to the current one.
+ for (; it != itEnd; ++it) {
+ add((*it)->m_element, (*it)->m_count);
+ }
+
+}
+
+
+void
+ElementCountMap::add(Element *_element, int _count)
+{
+ ElementCount *elemCount;
+
+ elemCount = search(_element);
+ if (elemCount)
+ elemCount->m_count += _count;
+ else
+ m_map.append(new ElementCount(_element, _count));
+}
+
+
+void
+ElementCountMap::multiply(int _factor)
+{
+ Iterator it = begin();
+ Iterator itEnd = end();
+
+ for (; it != itEnd; ++it)
+ (*it)->multiply(_factor);
+}
+
+
+// ================================================================
+// class MoleculeParser
+
+
+MoleculeParser::MoleculeParser()
+ : Parser()
+{
+}
+
+
+MoleculeParser::MoleculeParser(const QString& _str)
+ : Parser(_str)
+{
+}
+
+
+MoleculeParser::~MoleculeParser()
+{
+ //Parser::~Parser();
+}
+
+
+// ----------------------------------------------------------------
+// public methods
+
+
+// Try to parse the molecule and get the weight of it.
+//
+// This method also acts as the main loop.
+
+bool
+MoleculeParser::weight(QString _moleculeString,
+ double *_resultMass,
+ ElementCountMap *_resultMap)
+{
+ // Clear the result variables and set m_error to false
+ _resultMap->clear();
+ m_error = false;
+ *_resultMass = 0.0;
+
+ // Initialize the parsing process, and parse te molecule.
+ start(_moleculeString);
+ parseSubmolecule(_resultMass, _resultMap);
+
+ if (nextToken() != -1)
+ return false;
+
+ if ( m_error )//there was an error in the input...
+ return false;
+
+ return true;
+}
+
+
+// ----------------------------------------------------------------
+// helper methods for the public methods
+
+
+// Parse a submolecule. This is a list of terms.
+//
+
+bool
+MoleculeParser::parseSubmolecule(double *_resultMass,
+ ElementCountMap *_resultMap)
+{
+ double subMass = 0.0;
+ ElementCountMap subMap;
+
+ *_resultMass = 0.0;
+ _resultMap->clear();
+ while (parseTerm(&subMass, &subMap)) {
+ //kdDebug() << "Parsed a term, weight = " << subresult << endl;
+
+ // Add the mass and composition of the submolecule to the total.
+ *_resultMass += subMass;
+ _resultMap->add(subMap);
+ }
+
+ return true;
+}
+
+
+// Parse a term within the molecule, i.e. a single atom or a
+// submolecule within parenthesis followed by an optional number.
+// Examples: Bk, Mn2, (COOH)2
+//
+// Return true if correct, otherwise return false.
+
+// If correct, the mass of the term is returned in *_resultMass, and
+// the flattened composition of the molecule in *_resultMap.
+//
+
+bool
+MoleculeParser::parseTerm(double *_resultMass,
+ ElementCountMap *_resultMap)
+{
+ *_resultMass = 0.0;
+ _resultMap->clear();
+
+#if 0
+ kdDebug() << "parseTerm(): Next token = "
+ << nextToken() << endl;
+#endif
+ if (nextToken() == ELEMENT_TOKEN) {
+ //kdDebug() << "Parsed an element: " << m_elementVal->symbol() << endl;
+ *_resultMass = m_elementVal->dataAsVariant( ChemicalDataObject::mass ).toDouble();
+ _resultMap->add(m_elementVal, 1);
+
+ getNextToken();
+ }
+
+ else if (nextToken() == '(') {
+ // A submolecule.
+
+ getNextToken();
+ parseSubmolecule(_resultMass, _resultMap);
+
+ // Must end in a ")".
+ if (nextToken() == ')') {
+ //kdDebug() << "Parsed a submolecule. weight = " << *_result << endl;
+ getNextToken();
+ }
+ else
+ return false;
+ }
+ else
+ // Neither an element nor a list within ().
+ return false;
+
+ // Optional number.
+ if (nextToken() == INT_TOKEN) {
+ //kdDebug() << "Parsed a number: " << intVal() << endl;
+
+ *_resultMass *= intVal();
+ _resultMap->multiply(intVal());
+
+ getNextToken();
+ }
+
+ kdDebug() << "Weight of term = " << *_resultMass << endl;
+ return true;
+}
+
+
+// ----------------------------------------------------------------
+// protected methods
+
+
+// Extend Parser::getNextToken with elements.
+
+int
+MoleculeParser::getNextToken()
+{
+ QString elementName;
+
+#if 0
+ kdDebug() << "getNextToken(): Next character = "
+ << nextChar() << endl;
+#endif
+
+ // Check if the token is an element name.
+ if ('A' <= nextChar() && nextChar() <= 'Z') {
+ elementName = char(nextChar());
+ getNextChar();
+
+ if ('a' <= nextChar() && nextChar() <= 'z') {
+ elementName.append(char(nextChar()));
+ getNextChar();
+ }
+
+ // Look up the element from the name..
+ m_elementVal = lookupElement(elementName);
+ if (m_elementVal)
+ {
+ m_nextToken = ELEMENT_TOKEN;
+ }
+ else
+ m_nextToken = -1;
+ }
+ else
+ return Parser::getNextToken();
+
+ return m_nextToken;
+}
+
+
+// ----------------------------------------------------------------
+// private methods
+
+
+Element *
+MoleculeParser::lookupElement( const QString& _name )
+{
+ QList<Element*> elementList = KalziumDataObject::instance()->ElementList;
+
+ //kdDebug() << "looking up " << _name << endl;
+
+ QList<Element*>::ConstIterator it = elementList.constBegin();
+ const QList<Element*>::ConstIterator end = elementList.constEnd();
+
+ for (; it != end; ++it) {
+ if ( (*it)->dataAsVariant(ChemicalDataObject::symbol) == _name ) {
+ kdDebug() << "Found element " << _name << endl;
+ return *it;
+ }
+ }
+
+ //if there is an error make m_error true.
+ m_error = true;
+
+ kdDebug() << k_funcinfo << "no such element, parsing error!: " << _name << endl;
+ return NULL;
+}
--- /dev/null
+/***************************************************************************
+ copyright : (C) 2005 by Inge Wallin
+ email : inge@lysator.liu.se
+ ***************************************************************************/
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#ifndef MOLECULEPARSER_H
+#define MOLECULEPARSER_H
+
+#include "element.h"
+#include "parser.h"
+
+#include <qmap.h>
+#include <qlist.h>
+
+
+/**
+ * @class ElementCountMap
+ */
+class ElementCount {
+ public:
+ ElementCount(Element *_element, int _count)
+ {
+ m_element = _element;
+ m_count = _count;
+ }
+ ElementCount(Element *_element)
+ {
+ m_element = _element;
+ m_count = 0;
+ }
+
+ ~ElementCount();
+
+ Element *element() const { return m_element; }
+ int count() const { return m_count; }
+ void add(int _count) { m_count += _count; }
+ void multiply(int _factor) { m_count *= _factor; }
+
+ Element *m_element;
+ int m_count;
+};
+
+
+/**
+ * @class ElementCount
+ */
+class ElementCountMap {
+ public:
+ ElementCountMap();
+ ~ElementCountMap();
+
+ void clear() { m_map.clear(); }
+
+ ElementCount *search(Element *_element);
+ void add(ElementCountMap &_map);
+ void add(Element *_element, int _count);
+ void multiply(int _factor);
+
+ typedef QList<ElementCount*>::Iterator Iterator;
+ Iterator begin() { return m_map.begin(); }
+ Iterator end() { return m_map.end(); }
+
+ private:
+ QList<ElementCount*> m_map;
+};
+
+
+
+/**
+ * @class MoleculeParser
+ *
+ * Parse molecule formulas.
+ *
+ * Usage:
+ * @code
+ * MoleculeParser parser;
+ * QString chemical_formula = "C2H5OH";
+ * double weight;
+ *
+ * if (parser.weight(chemical_formula, &weight))
+ * cout << "Weight of " << chemical_formula << " = " << weight << ".\n";
+ * else
+ * cout << "Parse error\n";
+ * @endcode
+ *
+ * @author Inge Wallin
+ */
+class MoleculeParser : public Parser {
+
+public:
+ static const int ELEMENT_TOKEN = 300;
+
+ MoleculeParser();
+ MoleculeParser( const QString& _str);
+ virtual ~MoleculeParser();
+
+ /**
+ * Try to parse the molecule @p molecule and get the weight of it.
+ * The calculated weight is stored in @p _result.
+ *
+ * @return whether the parsing was successful or not
+ */
+ bool weight(QString _moleculeString,
+ double *_resultMass,
+ ElementCountMap *_resultMap);
+ private:
+ // Helper functions
+ bool parseSubmolecule(double *_resultMass,
+ ElementCountMap *_resultMap);
+ bool parseTerm(double *_resultMass,
+ ElementCountMap *_resultMap);
+
+ Element *lookupElement( const QString& _name );
+
+ QMap<Element*, int> m_elementMap;
+
+ //if this booloean is "true" the parser found an error
+ bool m_error;
+
+protected:
+
+ /**
+ * Extends the standard tokenizer in Parser::getNextToken().
+ */
+ virtual int getNextToken();
+
+private:
+ Element *m_elementVal; // Valid if m_nextToken == ELEMENT_TOKEN
+};
+
+#endif
--- /dev/null
+/***************************************************************************
+ copyright : (C) 2005 by Inge Wallin
+ email : inge@lysator.liu.se
+ ***************************************************************************/
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+
+#include <ctype.h>
+
+#include <kdebug.h>
+
+#include "parser.h"
+
+
+Parser::Parser()
+{
+ start(QString::null);
+}
+
+
+Parser::Parser(const QString& _str)
+{
+ start(_str);
+}
+
+
+Parser::~Parser()
+{
+}
+
+
+void
+Parser::start(const QString& _str)
+{
+ m_str = _str;
+
+ if (_str.isNull()) {
+ m_index = -1;
+ m_nextChar = -1;
+ m_nextToken = -1;
+ }
+ else {
+ m_index = 0;
+ m_nextChar = m_str.at(0).toLatin1();
+ getNextToken();
+ }
+}
+
+
+// ----------------------------------------------------------------
+
+
+// Skip whitespace, and try to parse the following characters as an int.
+//
+// Return true if successful.
+
+bool
+Parser::parseInt(int *_result)
+{
+ int sign = 1;
+
+ skipWhitespace();
+
+ if (m_nextChar == '-') {
+ sign = -1;
+ getNextChar();
+ }
+
+ if (!isdigit(m_nextChar))
+ return false;
+
+ int result = 0;
+ while (isdigit(m_nextChar)) {
+ result = result * 10 + (m_nextChar - '0');
+ getNextChar();
+ }
+
+ *_result = sign * result;
+ return true;
+}
+
+
+// Skip whitespace, and try to parse the following characters as a
+// simple float of the type -?[0-9]+'.'?[0-9]*
+//
+// Return true if successful.
+
+bool
+Parser::parseSimpleFloat(double *_result)
+{
+ double sign = 1.0;
+
+ skipWhitespace();
+ if (m_nextChar == '-') {
+ sign = -1.0;
+ getNextChar();
+ }
+
+ if (!isdigit(m_nextChar))
+ return false;
+
+ double result = 0.0;
+
+ // The integer.
+ while (isdigit(m_nextChar)) {
+ result = result * 10.0 + (double) (m_nextChar - '0');
+ getNextChar();
+ }
+ *_result = result;
+
+ if (m_nextChar != '.' || !isdigit(getNextChar())) {
+ *_result = sign * result;
+ return true;
+ }
+
+ double decimal = 0.1;
+ while (isdigit(m_nextChar)) {
+ result += decimal * (double) (m_nextChar - '0');
+ decimal /= 10.0;
+ getNextChar();
+ }
+
+ *_result = sign * result;
+ return true;
+}
+
+
+// ----------------------------------------------------------------
+// protected methods
+
+
+int
+Parser::getNextChar()
+{
+ if (m_index == -1)
+ return -1;
+
+ // If end of string, then reset the parser.
+ if (m_index == (int) m_str.length()) {
+ m_index = -1;
+ m_nextChar = -1;
+ }
+ else
+ m_nextChar = m_str.at(++m_index).toLatin1();
+
+ // Take care of null-terminated strings.
+ if (m_nextChar == 0) {
+ m_index = -1;
+ m_nextChar = -1;
+ }
+
+ //kdDebug() << "Parser::getNextChar(): char = " << m_nextChar << endl;
+
+ return m_nextChar;
+}
+
+
+int
+Parser::skipWhitespace()
+{
+ while (QChar(m_nextChar).isSpace())
+ getNextChar();
+
+ return m_nextChar;
+}
+
+
+// Get the next token. This corresponds to the lexical analyzer of a
+// standard parser, e.g as generated by lex.
+//
+// This basic parser supports integers and simple
+// floats. Reimplement this method to extend it.
+
+int
+Parser::getNextToken()
+{
+ int saveIndex = m_index;
+
+ skipWhitespace();
+ if (isdigit(nextChar())) {
+ // At this point we know that there is a valid number in the
+ // string. The only question now, is whether it is an int or a
+ // float.
+
+ parseInt(&m_intVal);
+
+ skipWhitespace();
+ if (nextChar() == '.') {
+ m_index = saveIndex;
+
+ // No need to check since we already know it is correct.
+ (void) parseSimpleFloat(&m_floatVal);
+ m_nextToken = FLOAT_TOKEN;
+ }
+ else
+ m_nextToken = INT_TOKEN;
+ }
+
+ else if (nextChar() != -1) {
+ // Any character.
+ m_nextToken = nextChar();
+ getNextChar();
+ }
+
+ else
+ // End of string.
+ m_nextToken = -1;
+
+ return m_nextToken;
+}
--- /dev/null
+/***************************************************************************
+ copyright : (C) 2005 by Inge Wallin
+ email : inge@lysator.liu.se
+ ***************************************************************************/
+/***************************************************************************
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ ***************************************************************************/
+
+#ifndef PARSER_H
+#define PARSER_H
+
+#include <qstring.h>
+
+/**
+ * @class Parser
+ * This is a general purpose parser originally written by Inge Wallin.
+ *
+ * It is intended to be subclassed; see MoleculeParser.
+ *
+ * @author Inge Wallin
+ */
+class Parser {
+public:
+ // All characters are their own token value per default.
+ static const int INT_TOKEN = 257;
+ static const int FLOAT_TOKEN = 258;
+ // Extend this list in your subclass to make a more advanced parser.
+
+ Parser();
+ Parser(const QString& _str);
+ virtual ~Parser();
+
+ /**
+ * Start a new parse.
+ */
+ void start(const QString& _str);
+
+ /**
+ * Peek at the next character;
+ */
+ int nextChar() const { return m_nextChar; }
+
+ /**
+ * Peek at the next token.
+ */
+ int nextToken() const { return m_nextToken; }
+
+ /**
+ * Get the value stored for different types of tokens.
+ */
+ int intVal() const { return m_intVal; }
+ float floatVal() const { return m_floatVal; }
+
+private:
+
+ // Try to parse some special datatypes.
+ bool parseInt(int *_result);
+ bool parseSimpleFloat(double *_result);
+
+protected:
+
+ /**
+ * Make the next character the current one.
+ */
+ int getNextChar();
+
+ /**
+ * Make the next non-space character the current one.
+ */
+ int skipWhitespace();
+
+ /**
+ * Fetches the next token.
+ */
+ virtual int getNextToken();
+
+private:
+ QString m_str;
+ int m_index;
+ int m_nextChar;
+
+ protected:
+
+ // Lexical analysis and token handling. These members need to be
+ // protected instead of private since we want to be able to
+ // reimplement getNextToken().
+
+ /**
+ * The next token to be used in the parser.
+ */
+ int m_nextToken;
+
+ // Values for the respective token. These could be made into a
+ // union, but I don't think it is necessary to bother, since they
+ // are so few and we don't instantiate a lot of copies of the
+ // parser.
+ int m_intVal; // Valid if m_nextToken == INT_TOKEN
+ double m_floatVal; // Valid if m_nextToken == FLOAT_TOKEN
+};
+
+#endif