* Add the classes needed for the moleculeparser.

author Carsten Niehaus <cniehaus@gmx.de>

Fri, 30 Dec 2005 12:27:39 +0000 (12:27 +0000)

committer Carsten Niehaus <cniehaus@gmx.de>

Fri, 30 Dec 2005 12:27:39 +0000 (12:27 +0000)
author Carsten Niehaus <cniehaus@gmx.de>
Fri, 30 Dec 2005 12:27:39 +0000 (12:27 +0000)
committer Carsten Niehaus <cniehaus@gmx.de>
Fri, 30 Dec 2005 12:27:39 +0000 (12:27 +0000)
diff --git a/libscience/moleculeparser.cpp b/libscience/moleculeparser.cpp

new file mode 100644 (file)

index 0000000..7f7845c
--- /dev/null
+++ b/libscience/moleculeparser.cpp
@@ -0,0 +1,304 @@
+/***************************************************************************
+    copyright            : (C) 2005 by Inge Wallin
+    email                : inge@lysator.liu.se
+ ***************************************************************************/
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+
+
+#include <ctype.h>
+
+#include <kdebug.h>
+
+#include "kalziumdataobject.h"
+#include "moleculeparser.h"
+
+
+// ================================================================
+//                    class ElementCountMap
+
+
+
+ElementCountMap::ElementCountMap()
+{
+       m_map.clear();
+}
+
+
+ElementCountMap::~ElementCountMap()
+{
+}
+
+
+ElementCount *
+ElementCountMap::search(Element *_element)
+{
+       QList<ElementCount *>::ConstIterator       it    = m_map.constBegin();
+       const QList<ElementCount *>::ConstIterator itEnd = m_map.constEnd();
+
+       for (; it != itEnd; ++it) {
+               if ((*it)->element() == _element)
+                       return *it;
+       }
+
+       return 0;
+}
+
+
+void
+ElementCountMap::add(ElementCountMap &_map)
+{
+       QList<ElementCount *>::ConstIterator       it    = _map.m_map.constBegin();
+       const QList<ElementCount *>::ConstIterator itEnd = _map.m_map.constEnd();
+
+       // Step throught _map and for each element, add it to the current one.
+       for (; it != itEnd; ++it) {
+               add((*it)->m_element, (*it)->m_count);
+       }
+       
+}
+
+
+void
+ElementCountMap::add(Element *_element, int _count)
+{
+       ElementCount  *elemCount;
+
+       elemCount = search(_element);
+       if (elemCount)
+               elemCount->m_count += _count;
+       else
+               m_map.append(new ElementCount(_element, _count));
+}
+
+
+void
+ElementCountMap::multiply(int _factor)
+{
+       Iterator  it    = begin();
+       Iterator  itEnd = end();
+
+       for (; it != itEnd; ++it)
+               (*it)->multiply(_factor);
+}
+
+
+// ================================================================
+//                    class MoleculeParser
+
+
+MoleculeParser::MoleculeParser()
+    : Parser()
+{
+}
+
+
+MoleculeParser::MoleculeParser(const QString& _str)
+    : Parser(_str)
+{
+}
+
+
+MoleculeParser::~MoleculeParser()
+{
+    //Parser::~Parser();
+}
+
+
+// ----------------------------------------------------------------
+//                            public methods
+
+
+// Try to parse the molecule and get the weight of it.
+//
+// This method also acts as the main loop.
+
+bool
+MoleculeParser::weight(QString         _moleculeString, 
+                                          double          *_resultMass,
+                                          ElementCountMap *_resultMap)
+{
+       // Clear the result variables and set m_error to false
+       _resultMap->clear();
+       m_error = false;
+       *_resultMass = 0.0;
+
+       // Initialize the parsing process, and parse te molecule.
+       start(_moleculeString);
+       parseSubmolecule(_resultMass, _resultMap);
+
+       if (nextToken() != -1)
+               return false;
+
+       if ( m_error )//there was an error in the input...
+               return false;
+
+       return true;
+}
+
+
+// ----------------------------------------------------------------
+//            helper methods for the public methods
+
+
+// Parse a submolecule.  This is a list of terms.
+//
+
+bool
+MoleculeParser::parseSubmolecule(double          *_resultMass,
+                                                                ElementCountMap *_resultMap)
+{
+    double           subMass = 0.0;
+    ElementCountMap  subMap;
+
+    *_resultMass = 0.0;
+       _resultMap->clear();
+    while (parseTerm(&subMass, &subMap)) {
+               //kdDebug() << "Parsed a term, weight = " << subresult << endl;
+
+               // Add the mass and composition of the submolecule to the total.
+               *_resultMass += subMass;
+               _resultMap->add(subMap);
+    }
+
+    return true;
+}
+
+
+// Parse a term within the molecule, i.e. a single atom or a
+// submolecule within parenthesis followed by an optional number.
+// Examples: Bk, Mn2, (COOH)2
+//
+// Return true if correct, otherwise return false.  
+
+// If correct, the mass of the term is returned in *_resultMass, and
+// the flattened composition of the molecule in *_resultMap.
+//
+
+bool
+MoleculeParser::parseTerm(double          *_resultMass,
+                                                 ElementCountMap *_resultMap)
+{
+    *_resultMass = 0.0;
+       _resultMap->clear();
+ 
+#if 0
+    kdDebug() << "parseTerm(): Next token =  "
+                         << nextToken() << endl;
+#endif
+    if (nextToken() == ELEMENT_TOKEN) {
+               //kdDebug() << "Parsed an element: " << m_elementVal->symbol() << endl;
+               *_resultMass = m_elementVal->dataAsVariant( ChemicalDataObject::mass ).toDouble();
+               _resultMap->add(m_elementVal, 1);
+
+               getNextToken();
+    }
+
+    else if (nextToken() == '(') {
+               // A submolecule.
+
+               getNextToken();
+               parseSubmolecule(_resultMass, _resultMap);
+
+               // Must end in a ")".
+               if (nextToken() == ')') {
+                       //kdDebug() << "Parsed a submolecule. weight = " << *_result << endl;
+                       getNextToken();
+               }
+               else
+                       return false;
+    }
+    else 
+               // Neither an element nor a list within ().
+               return false;
+
+    // Optional number.
+    if (nextToken() == INT_TOKEN) {
+               //kdDebug() << "Parsed a number: " << intVal() << endl;
+
+       *_resultMass *= intVal();
+               _resultMap->multiply(intVal());
+
+               getNextToken();
+    }
+
+    kdDebug() << "Weight of term = " << *_resultMass << endl;
+    return true;
+}
+
+
+// ----------------------------------------------------------------
+//                           protected methods
+
+
+// Extend Parser::getNextToken with elements.
+
+int
+MoleculeParser::getNextToken()
+{
+    QString  elementName;
+
+#if 0
+    kdDebug() << "getNextToken(): Next character = "
+             << nextChar() << endl;
+#endif
+
+    // Check if the token is an element name.
+    if ('A' <= nextChar() && nextChar() <= 'Z') {
+       elementName = char(nextChar());
+       getNextChar();
+
+       if ('a' <= nextChar() && nextChar() <= 'z') {
+           elementName.append(char(nextChar()));
+           getNextChar();
+       }
+
+               // Look up the element from the name..
+       m_elementVal = lookupElement(elementName);
+       if (m_elementVal)
+       {
+           m_nextToken = ELEMENT_TOKEN;
+       }
+       else
+           m_nextToken = -1;
+    }
+    else
+       return Parser::getNextToken();
+
+    return m_nextToken;
+}
+
+
+// ----------------------------------------------------------------
+//                          private methods
+
+
+Element *
+MoleculeParser::lookupElement( const QString& _name )
+{
+    QList<Element*> elementList = KalziumDataObject::instance()->ElementList;
+
+    //kdDebug() << "looking up " << _name << endl;
+
+    QList<Element*>::ConstIterator        it  = elementList.constBegin();
+    const QList<Element*>::ConstIterator  end = elementList.constEnd();
+
+       for (; it != end; ++it) {
+               if ( (*it)->dataAsVariant(ChemicalDataObject::symbol) == _name ) {
+                       kdDebug() << "Found element " << _name << endl;
+                       return *it;
+               }
+       }
+
+       //if there is an error make m_error true.
+       m_error = true;
+
+       kdDebug() << k_funcinfo << "no such element, parsing error!: " << _name << endl;
+       return NULL;
+}
diff --git a/libscience/moleculeparser.h b/libscience/moleculeparser.h

new file mode 100644 (file)

index 0000000..2948dca
--- /dev/null
+++ b/libscience/moleculeparser.h
@@ -0,0 +1,139 @@
+/***************************************************************************
+    copyright            : (C) 2005 by Inge Wallin
+    email                : inge@lysator.liu.se
+ ***************************************************************************/
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+
+#ifndef MOLECULEPARSER_H
+#define MOLECULEPARSER_H
+
+#include "element.h"
+#include "parser.h"
+
+#include <qmap.h>
+#include <qlist.h>
+
+
+/**
+ * @class ElementCountMap
+ */
+class ElementCount {
+ public:
+    ElementCount(Element *_element, int _count)
+    {
+           m_element = _element;
+           m_count   = _count;
+       }
+    ElementCount(Element *_element)
+        {
+           m_element = _element;
+           m_count   = 0;
+       }
+       
+    ~ElementCount();
+
+    Element *element() const    { return m_element;   }
+    int   count()  const        { return m_count;     }
+    void  add(int _count)       { m_count += _count;  }
+       void  multiply(int _factor) { m_count *= _factor; }
+
+    Element  *m_element;
+    int       m_count;
+};
+
+
+/**
+ * @class ElementCount
+ */
+class ElementCountMap {
+ public:
+    ElementCountMap();
+    ~ElementCountMap();
+
+    void  clear()          { m_map.clear(); }
+
+    ElementCount  *search(Element *_element);
+    void           add(ElementCountMap &_map);
+    void           add(Element *_element, int _count);
+       void           multiply(int _factor);
+
+       typedef QList<ElementCount*>::Iterator  Iterator;
+       Iterator   begin() { return  m_map.begin(); }
+       Iterator   end()   { return  m_map.end();   }
+
+ private:
+    QList<ElementCount*>  m_map;
+};
+
+
+
+/**
+ * @class MoleculeParser
+ *
+ * Parse molecule formulas.
+ *
+ * Usage:
+ * @code
+ *   MoleculeParser  parser;
+ *   QString         chemical_formula = "C2H5OH";
+ *   double          weight;
+ *
+ *   if (parser.weight(chemical_formula, &weight))
+ *     cout << "Weight of " << chemical_formula << " = " << weight << ".\n";
+ *   else
+ *     cout << "Parse error\n";
+ * @endcode
+ *
+ * @author Inge Wallin
+ */
+class MoleculeParser : public Parser {
+
+public:
+    static const int  ELEMENT_TOKEN = 300;
+
+    MoleculeParser();
+    MoleculeParser( const QString& _str);
+    virtual ~MoleculeParser();
+
+    /**
+     * Try to parse the molecule @p molecule and get the weight of it.
+     * The calculated weight is stored in @p _result.
+     *
+     * @return whether the parsing was successful or not
+     */
+    bool  weight(QString         _moleculeString,
+                                double          *_resultMass,
+                                ElementCountMap *_resultMap);
+ private:
+    // Helper functions
+    bool      parseSubmolecule(double          *_resultMass,
+                                                          ElementCountMap *_resultMap);
+    bool      parseTerm(double          *_resultMass, 
+                                               ElementCountMap *_resultMap);
+
+    Element  *lookupElement( const QString& _name );
+
+    QMap<Element*, int> m_elementMap;
+               
+       //if this booloean is "true" the parser found an error
+       bool             m_error;
+
+protected:
+
+    /**
+     * Extends the standard tokenizer in Parser::getNextToken().
+     */
+    virtual int  getNextToken();
+
+private:
+    Element  *m_elementVal;    // Valid if m_nextToken == ELEMENT_TOKEN
+};
+
+#endif
diff --git a/libscience/parser.cpp b/libscience/parser.cpp

new file mode 100644 (file)

index 0000000..38b4683
--- /dev/null
+++ b/libscience/parser.cpp
@@ -0,0 +1,217 @@
+/***************************************************************************
+    copyright            : (C) 2005 by Inge Wallin
+    email                : inge@lysator.liu.se
+ ***************************************************************************/
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+
+
+#include <ctype.h>
+
+#include <kdebug.h>
+
+#include "parser.h"
+
+
+Parser::Parser()
+{
+    start(QString::null);
+}
+
+
+Parser::Parser(const QString& _str)
+{
+    start(_str);
+}
+
+
+Parser::~Parser()
+{
+}
+
+
+void
+Parser::start(const QString& _str)
+{
+    m_str = _str;
+
+    if (_str.isNull()) {
+       m_index     = -1;
+       m_nextChar  = -1;
+       m_nextToken = -1;
+    }
+    else {
+       m_index     = 0;
+       m_nextChar  = m_str.at(0).toLatin1();
+       getNextToken();
+    }
+}
+
+
+// ----------------------------------------------------------------
+
+
+// Skip whitespace, and try to parse the following characters as an int.
+//
+// Return true if successful.  
+
+bool
+Parser::parseInt(int *_result)
+{
+    int  sign = 1;
+
+    skipWhitespace();
+
+    if (m_nextChar == '-') {
+       sign = -1;
+       getNextChar();
+    }
+
+    if (!isdigit(m_nextChar))
+       return false;
+
+    int  result = 0;
+    while (isdigit(m_nextChar)) {
+       result = result * 10 + (m_nextChar - '0');
+       getNextChar();
+    }
+
+    *_result = sign * result;
+    return true;
+}
+
+
+// Skip whitespace, and try to parse the following characters as a
+// simple float of the type -?[0-9]+'.'?[0-9]*
+//
+// Return true if successful.  
+
+bool
+Parser::parseSimpleFloat(double *_result)
+{
+    double  sign = 1.0;
+
+    skipWhitespace();
+    if (m_nextChar == '-') {
+       sign = -1.0;
+       getNextChar();
+    }
+
+    if (!isdigit(m_nextChar))
+       return false;
+
+    double  result = 0.0;
+
+    // The integer.
+    while (isdigit(m_nextChar)) {
+       result = result * 10.0 + (double) (m_nextChar - '0');
+       getNextChar();
+    }
+    *_result = result;
+
+    if (m_nextChar != '.' || !isdigit(getNextChar())) {
+       *_result = sign * result;
+       return true;
+    }
+       
+    double  decimal = 0.1;
+    while (isdigit(m_nextChar)) {
+       result += decimal * (double) (m_nextChar - '0');
+       decimal /= 10.0;
+       getNextChar();
+    }
+
+    *_result = sign * result;
+    return true;
+}
+
+
+// ----------------------------------------------------------------
+//                           protected methods
+
+
+int
+Parser::getNextChar()
+{
+    if (m_index == -1)
+       return -1;
+
+    // If end of string, then reset the parser.
+    if (m_index == (int) m_str.length()) {
+       m_index    = -1;
+       m_nextChar = -1;
+    }
+    else 
+       m_nextChar = m_str.at(++m_index).toLatin1();
+
+    // Take care of null-terminated strings.
+    if (m_nextChar == 0) {
+       m_index    = -1;
+       m_nextChar = -1;
+    }
+
+    //kdDebug() << "Parser::getNextChar(): char = " << m_nextChar << endl;
+
+    return m_nextChar;
+}
+
+
+int
+Parser::skipWhitespace()
+{
+    while (QChar(m_nextChar).isSpace())
+       getNextChar();
+
+    return m_nextChar;
+}
+
+
+// Get the next token.  This corresponds to the lexical analyzer of a
+// standard parser, e.g as generated by lex.
+//
+// This basic parser supports integers and simple
+// floats.  Reimplement this method to extend it.
+
+int
+Parser::getNextToken()
+{
+    int  saveIndex = m_index;
+
+    skipWhitespace();
+    if (isdigit(nextChar())) {
+       // At this point we know that there is a valid number in the
+       // string.  The only question now, is whether it is an int or a
+       // float.
+
+       parseInt(&m_intVal);
+
+       skipWhitespace();
+       if (nextChar() == '.') {
+           m_index = saveIndex;
+
+           // No need to check since we already know it is correct.
+           (void) parseSimpleFloat(&m_floatVal);
+           m_nextToken = FLOAT_TOKEN;
+       }
+       else
+           m_nextToken = INT_TOKEN;
+    }
+
+    else if (nextChar() != -1) {
+       // Any character.
+       m_nextToken = nextChar();
+       getNextChar();
+    }
+
+    else
+       // End of string.
+       m_nextToken = -1;
+
+    return m_nextToken;
+}
diff --git a/libscience/parser.h b/libscience/parser.h

new file mode 100644 (file)

index 0000000..37355f8
--- /dev/null
+++ b/libscience/parser.h
@@ -0,0 +1,106 @@
+/***************************************************************************
+    copyright            : (C) 2005 by Inge Wallin
+    email                : inge@lysator.liu.se
+ ***************************************************************************/
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+
+#ifndef PARSER_H
+#define PARSER_H
+
+#include <qstring.h>
+
+/**
+ * @class Parser
+ * This is a general purpose parser originally written by Inge Wallin.
+ *
+ * It is intended to be subclassed; see MoleculeParser.
+ *
+ * @author Inge Wallin
+ */
+class Parser {
+public:
+    // All characters are their own token value per default.
+    static const  int  INT_TOKEN   = 257;
+    static const  int  FLOAT_TOKEN = 258;
+    // Extend this list in your subclass to make a more advanced parser.
+
+    Parser();
+    Parser(const QString& _str);
+    virtual ~Parser();
+
+    /**
+     * Start a new parse.
+     */
+    void   start(const QString& _str);
+
+    /**
+     * Peek at the next character;
+     */
+    int    nextChar()  const { return m_nextChar;  }
+
+    /**
+     * Peek at the next token.
+     */
+    int    nextToken() const { return m_nextToken; }
+
+    /**
+     * Get the value stored for different types of tokens.
+     */
+    int    intVal()    const { return m_intVal;    }
+    float  floatVal()  const { return m_floatVal;  }
+
+private:
+
+    // Try to parse some special datatypes.
+    bool  parseInt(int *_result);
+    bool  parseSimpleFloat(double *_result);
+
+protected:
+
+    /**
+     * Make the next character the current one.
+     */
+    int   getNextChar();
+
+    /**
+     * Make the next non-space character the current one.
+     */
+    int   skipWhitespace();
+
+    /**
+     * Fetches the next token.
+     */
+    virtual int   getNextToken();
+
+private:
+    QString  m_str;
+    int      m_index;
+    int      m_nextChar;
+
+ protected:
+
+    // Lexical analysis and token handling.  These members need to be
+    // protected instead of private since we want to be able to
+    // reimplement getNextToken().
+
+    /**
+     * The next token to be used in the parser.
+     */
+    int      m_nextToken;
+
+    // Values for the respective token.  These could be made into a
+    // union, but I don't think it is necessary to bother, since they
+    // are so few and we don't instantiate a lot of copies of the
+    // parser.
+    int      m_intVal;         // Valid if m_nextToken == INT_TOKEN
+    double   m_floatVal;       // Valid if m_nextToken == FLOAT_TOKEN
+};
+
+#endif
author	Carsten Niehaus <cniehaus@gmx.de>
	Fri, 30 Dec 2005 12:27:39 +0000 (12:27 +0000)
committer	Carsten Niehaus <cniehaus@gmx.de>
	Fri, 30 Dec 2005 12:27:39 +0000 (12:27 +0000)
libscience/moleculeparser.cpp	[new file with mode: 0644]	patch \| blob
libscience/moleculeparser.h	[new file with mode: 0644]	patch \| blob
libscience/parser.cpp	[new file with mode: 0644]	patch \| blob
libscience/parser.h	[new file with mode: 0644]	patch \| blob