From: Carsten Niehaus <cniehaus@gmx.de>
Date: Fri, 30 Dec 2005 12:27:39 +0000 (+0000)
Subject: * Add the classes needed for the moleculeparser.
X-Git-Tag: v3.80.2~203
X-Git-Url: https://git.rmz.fi/?a=commitdiff_plain;h=b2860812339ce07169800d916c44bfd527133a96;p=libqmvoc.git

* Add the classes needed for the moleculeparser.
* Not yet in the Makefile.am as qt-copy still compiles here
  and I cannot check if it compiles&links
* Added some /**@class foo*/ comments to enable doxygen-generation
  Need to wait until EBN is syncing to check what is missing

svn path=/trunk/KDE/kdeedu/libkdeedu/; revision=492590
---

diff --git a/libscience/moleculeparser.cpp b/libscience/moleculeparser.cpp
new file mode 100644
index 0000000..7f7845c
--- /dev/null
+++ b/libscience/moleculeparser.cpp
@@ -0,0 +1,304 @@
+/***************************************************************************
+    copyright            : (C) 2005 by Inge Wallin
+    email                : inge@lysator.liu.se
+ ***************************************************************************/
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+
+
+#include <ctype.h>
+
+#include <kdebug.h>
+
+#include "kalziumdataobject.h"
+#include "moleculeparser.h"
+
+
+// ================================================================
+//                    class ElementCountMap
+
+
+
+ElementCountMap::ElementCountMap()
+{
+	m_map.clear();
+}
+
+
+ElementCountMap::~ElementCountMap()
+{
+}
+
+
+ElementCount *
+ElementCountMap::search(Element *_element)
+{
+	QList<ElementCount *>::ConstIterator       it    = m_map.constBegin();
+	const QList<ElementCount *>::ConstIterator itEnd = m_map.constEnd();
+
+	for (; it != itEnd; ++it) {
+		if ((*it)->element() == _element)
+			return *it;
+	}
+
+	return 0;
+}
+
+
+void
+ElementCountMap::add(ElementCountMap &_map)
+{
+	QList<ElementCount *>::ConstIterator       it    = _map.m_map.constBegin();
+	const QList<ElementCount *>::ConstIterator itEnd = _map.m_map.constEnd();
+
+	// Step throught _map and for each element, add it to the current one.
+	for (; it != itEnd; ++it) {
+		add((*it)->m_element, (*it)->m_count);
+	}
+	
+}
+
+
+void
+ElementCountMap::add(Element *_element, int _count)
+{
+	ElementCount  *elemCount;
+
+	elemCount = search(_element);
+	if (elemCount)
+		elemCount->m_count += _count;
+	else
+		m_map.append(new ElementCount(_element, _count));
+}
+
+
+void
+ElementCountMap::multiply(int _factor)
+{
+	Iterator  it    = begin();
+	Iterator  itEnd = end();
+
+	for (; it != itEnd; ++it)
+		(*it)->multiply(_factor);
+}
+
+
+// ================================================================
+//                    class MoleculeParser
+
+
+MoleculeParser::MoleculeParser()
+    : Parser()
+{
+}
+
+
+MoleculeParser::MoleculeParser(const QString& _str)
+    : Parser(_str)
+{
+}
+
+
+MoleculeParser::~MoleculeParser()
+{
+    //Parser::~Parser();
+}
+
+
+// ----------------------------------------------------------------
+//                            public methods
+
+
+// Try to parse the molecule and get the weight of it.
+//
+// This method also acts as the main loop.
+
+bool
+MoleculeParser::weight(QString         _moleculeString, 
+					   double          *_resultMass,
+					   ElementCountMap *_resultMap)
+{
+	// Clear the result variables and set m_error to false
+	_resultMap->clear();
+	m_error = false;
+	*_resultMass = 0.0;
+
+	// Initialize the parsing process, and parse te molecule.
+	start(_moleculeString);
+	parseSubmolecule(_resultMass, _resultMap);
+
+	if (nextToken() != -1)
+		return false;
+
+	if ( m_error )//there was an error in the input...
+		return false;
+
+	return true;
+}
+
+
+// ----------------------------------------------------------------
+//            helper methods for the public methods
+
+
+// Parse a submolecule.  This is a list of terms.
+//
+
+bool
+MoleculeParser::parseSubmolecule(double          *_resultMass,
+								 ElementCountMap *_resultMap)
+{
+    double           subMass = 0.0;
+    ElementCountMap  subMap;
+
+    *_resultMass = 0.0;
+	_resultMap->clear();
+    while (parseTerm(&subMass, &subMap)) {
+		//kdDebug() << "Parsed a term, weight = " << subresult << endl;
+
+		// Add the mass and composition of the submolecule to the total.
+		*_resultMass += subMass;
+		_resultMap->add(subMap);
+    }
+
+    return true;
+}
+
+
+// Parse a term within the molecule, i.e. a single atom or a
+// submolecule within parenthesis followed by an optional number.
+// Examples: Bk, Mn2, (COOH)2
+//
+// Return true if correct, otherwise return false.  
+
+// If correct, the mass of the term is returned in *_resultMass, and
+// the flattened composition of the molecule in *_resultMap.
+//
+
+bool
+MoleculeParser::parseTerm(double          *_resultMass,
+						  ElementCountMap *_resultMap)
+{
+    *_resultMass = 0.0;
+	_resultMap->clear();
+ 
+#if 0
+    kdDebug() << "parseTerm(): Next token =  "
+			  << nextToken() << endl;
+#endif
+    if (nextToken() == ELEMENT_TOKEN) {
+		//kdDebug() << "Parsed an element: " << m_elementVal->symbol() << endl;
+		*_resultMass = m_elementVal->dataAsVariant( ChemicalDataObject::mass ).toDouble();
+		_resultMap->add(m_elementVal, 1);
+
+		getNextToken();
+    }
+
+    else if (nextToken() == '(') {
+		// A submolecule.
+
+		getNextToken();
+		parseSubmolecule(_resultMass, _resultMap);
+
+		// Must end in a ")".
+		if (nextToken() == ')') {
+			//kdDebug() << "Parsed a submolecule. weight = " << *_result << endl;
+			getNextToken();
+		}
+		else
+			return false;
+    }
+    else 
+		// Neither an element nor a list within ().
+		return false;
+
+    // Optional number.
+    if (nextToken() == INT_TOKEN) {
+		//kdDebug() << "Parsed a number: " << intVal() << endl;
+
+    	*_resultMass *= intVal();
+		_resultMap->multiply(intVal());
+
+		getNextToken();
+    }
+
+    kdDebug() << "Weight of term = " << *_resultMass << endl;
+    return true;
+}
+
+
+// ----------------------------------------------------------------
+//                           protected methods
+
+
+// Extend Parser::getNextToken with elements.
+
+int
+MoleculeParser::getNextToken()
+{
+    QString  elementName;
+
+#if 0
+    kdDebug() << "getNextToken(): Next character = "
+	      << nextChar() << endl;
+#endif
+
+    // Check if the token is an element name.
+    if ('A' <= nextChar() && nextChar() <= 'Z') {
+	elementName = char(nextChar());
+	getNextChar();
+
+	if ('a' <= nextChar() && nextChar() <= 'z') {
+	    elementName.append(char(nextChar()));
+	    getNextChar();
+	}
+
+		// Look up the element from the name..
+	m_elementVal = lookupElement(elementName);
+	if (m_elementVal)
+	{
+	    m_nextToken = ELEMENT_TOKEN;
+	}
+	else
+	    m_nextToken = -1;
+    }
+    else
+	return Parser::getNextToken();
+
+    return m_nextToken;
+}
+
+
+// ----------------------------------------------------------------
+//                          private methods
+
+
+Element *
+MoleculeParser::lookupElement( const QString& _name )
+{
+    QList<Element*> elementList = KalziumDataObject::instance()->ElementList;
+
+    //kdDebug() << "looking up " << _name << endl;
+
+    QList<Element*>::ConstIterator        it  = elementList.constBegin();
+    const QList<Element*>::ConstIterator  end = elementList.constEnd();
+
+	for (; it != end; ++it) {
+		if ( (*it)->dataAsVariant(ChemicalDataObject::symbol) == _name ) {
+			kdDebug() << "Found element " << _name << endl;
+			return *it;
+		}
+	}
+
+	//if there is an error make m_error true.
+	m_error = true;
+
+	kdDebug() << k_funcinfo << "no such element, parsing error!: " << _name << endl;
+	return NULL;
+}
diff --git a/libscience/moleculeparser.h b/libscience/moleculeparser.h
new file mode 100644
index 0000000..2948dca
--- /dev/null
+++ b/libscience/moleculeparser.h
@@ -0,0 +1,139 @@
+/***************************************************************************
+    copyright            : (C) 2005 by Inge Wallin
+    email                : inge@lysator.liu.se
+ ***************************************************************************/
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+
+#ifndef MOLECULEPARSER_H
+#define MOLECULEPARSER_H
+
+#include "element.h"
+#include "parser.h"
+
+#include <qmap.h>
+#include <qlist.h>
+
+
+/**
+ * @class ElementCountMap
+ */
+class ElementCount {
+ public:
+    ElementCount(Element *_element, int _count)
+    {
+	    m_element = _element;
+	    m_count   = _count;
+	}
+    ElementCount(Element *_element)
+        {
+	    m_element = _element;
+	    m_count   = 0;
+	}
+	
+    ~ElementCount();
+
+    Element *element() const    { return m_element;   }
+    int   count()  const        { return m_count;     }
+    void  add(int _count)       { m_count += _count;  }
+	void  multiply(int _factor) { m_count *= _factor; }
+
+    Element  *m_element;
+    int       m_count;
+};
+
+
+/**
+ * @class ElementCount
+ */
+class ElementCountMap {
+ public:
+    ElementCountMap();
+    ~ElementCountMap();
+
+    void  clear()          { m_map.clear(); }
+
+    ElementCount  *search(Element *_element);
+    void           add(ElementCountMap &_map);
+    void           add(Element *_element, int _count);
+	void           multiply(int _factor);
+
+	typedef QList<ElementCount*>::Iterator  Iterator;
+	Iterator   begin() { return  m_map.begin(); }
+	Iterator   end()   { return  m_map.end();   }
+
+ private:
+    QList<ElementCount*>  m_map;
+};
+
+
+
+/**
+ * @class MoleculeParser
+ *
+ * Parse molecule formulas.
+ *
+ * Usage:
+ * @code
+ *   MoleculeParser  parser;
+ *   QString         chemical_formula = "C2H5OH";
+ *   double          weight;
+ *
+ *   if (parser.weight(chemical_formula, &weight))
+ *     cout << "Weight of " << chemical_formula << " = " << weight << ".\n";
+ *   else
+ *     cout << "Parse error\n";
+ * @endcode
+ *
+ * @author Inge Wallin
+ */
+class MoleculeParser : public Parser {
+
+public:
+    static const int  ELEMENT_TOKEN = 300;
+
+    MoleculeParser();
+    MoleculeParser( const QString& _str);
+    virtual ~MoleculeParser();
+
+    /**
+     * Try to parse the molecule @p molecule and get the weight of it.
+     * The calculated weight is stored in @p _result.
+     *
+     * @return whether the parsing was successful or not
+     */
+    bool  weight(QString         _moleculeString,
+				 double          *_resultMass,
+				 ElementCountMap *_resultMap);
+ private:
+    // Helper functions
+    bool      parseSubmolecule(double          *_resultMass,
+							   ElementCountMap *_resultMap);
+    bool      parseTerm(double          *_resultMass, 
+						ElementCountMap *_resultMap);
+
+    Element  *lookupElement( const QString& _name );
+
+    QMap<Element*, int> m_elementMap;
+		
+	//if this booloean is "true" the parser found an error
+	bool             m_error;
+
+protected:
+
+    /**
+     * Extends the standard tokenizer in Parser::getNextToken().
+     */
+    virtual int  getNextToken();
+
+private:
+    Element  *m_elementVal;	// Valid if m_nextToken == ELEMENT_TOKEN
+};
+
+#endif
diff --git a/libscience/parser.cpp b/libscience/parser.cpp
new file mode 100644
index 0000000..38b4683
--- /dev/null
+++ b/libscience/parser.cpp
@@ -0,0 +1,217 @@
+/***************************************************************************
+    copyright            : (C) 2005 by Inge Wallin
+    email                : inge@lysator.liu.se
+ ***************************************************************************/
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+
+
+#include <ctype.h>
+
+#include <kdebug.h>
+
+#include "parser.h"
+
+
+Parser::Parser()
+{
+    start(QString::null);
+}
+
+
+Parser::Parser(const QString& _str)
+{
+    start(_str);
+}
+
+
+Parser::~Parser()
+{
+}
+
+
+void
+Parser::start(const QString& _str)
+{
+    m_str = _str;
+
+    if (_str.isNull()) {
+	m_index     = -1;
+	m_nextChar  = -1;
+	m_nextToken = -1;
+    }
+    else {
+	m_index     = 0;
+	m_nextChar  = m_str.at(0).toLatin1();
+	getNextToken();
+    }
+}
+
+
+// ----------------------------------------------------------------
+
+
+// Skip whitespace, and try to parse the following characters as an int.
+//
+// Return true if successful.  
+
+bool
+Parser::parseInt(int *_result)
+{
+    int  sign = 1;
+
+    skipWhitespace();
+
+    if (m_nextChar == '-') {
+	sign = -1;
+	getNextChar();
+    }
+
+    if (!isdigit(m_nextChar))
+	return false;
+
+    int  result = 0;
+    while (isdigit(m_nextChar)) {
+	result = result * 10 + (m_nextChar - '0');
+	getNextChar();
+    }
+
+    *_result = sign * result;
+    return true;
+}
+
+
+// Skip whitespace, and try to parse the following characters as a
+// simple float of the type -?[0-9]+'.'?[0-9]*
+//
+// Return true if successful.  
+
+bool
+Parser::parseSimpleFloat(double *_result)
+{
+    double  sign = 1.0;
+
+    skipWhitespace();
+    if (m_nextChar == '-') {
+	sign = -1.0;
+	getNextChar();
+    }
+
+    if (!isdigit(m_nextChar))
+	return false;
+
+    double  result = 0.0;
+
+    // The integer.
+    while (isdigit(m_nextChar)) {
+	result = result * 10.0 + (double) (m_nextChar - '0');
+	getNextChar();
+    }
+    *_result = result;
+
+    if (m_nextChar != '.' || !isdigit(getNextChar())) {
+	*_result = sign * result;
+	return true;
+    }
+	
+    double  decimal = 0.1;
+    while (isdigit(m_nextChar)) {
+	result += decimal * (double) (m_nextChar - '0');
+	decimal /= 10.0;
+	getNextChar();
+    }
+
+    *_result = sign * result;
+    return true;
+}
+
+
+// ----------------------------------------------------------------
+//                           protected methods
+
+
+int
+Parser::getNextChar()
+{
+    if (m_index == -1)
+	return -1;
+
+    // If end of string, then reset the parser.
+    if (m_index == (int) m_str.length()) {
+	m_index    = -1;
+	m_nextChar = -1;
+    }
+    else 
+	m_nextChar = m_str.at(++m_index).toLatin1();
+
+    // Take care of null-terminated strings.
+    if (m_nextChar == 0) {
+	m_index    = -1;
+	m_nextChar = -1;
+    }
+
+    //kdDebug() << "Parser::getNextChar(): char = " << m_nextChar << endl;
+
+    return m_nextChar;
+}
+
+
+int
+Parser::skipWhitespace()
+{
+    while (QChar(m_nextChar).isSpace())
+	getNextChar();
+
+    return m_nextChar;
+}
+
+
+// Get the next token.  This corresponds to the lexical analyzer of a
+// standard parser, e.g as generated by lex.
+//
+// This basic parser supports integers and simple
+// floats.  Reimplement this method to extend it.
+
+int
+Parser::getNextToken()
+{
+    int  saveIndex = m_index;
+
+    skipWhitespace();
+    if (isdigit(nextChar())) {
+	// At this point we know that there is a valid number in the
+	// string.  The only question now, is whether it is an int or a
+	// float.
+
+	parseInt(&m_intVal);
+
+	skipWhitespace();
+	if (nextChar() == '.') {
+	    m_index = saveIndex;
+
+	    // No need to check since we already know it is correct.
+	    (void) parseSimpleFloat(&m_floatVal);
+	    m_nextToken = FLOAT_TOKEN;
+	}
+	else
+	    m_nextToken = INT_TOKEN;
+    }
+
+    else if (nextChar() != -1) {
+	// Any character.
+	m_nextToken = nextChar();
+	getNextChar();
+    }
+
+    else
+	// End of string.
+	m_nextToken = -1;
+
+    return m_nextToken;
+}
diff --git a/libscience/parser.h b/libscience/parser.h
new file mode 100644
index 0000000..37355f8
--- /dev/null
+++ b/libscience/parser.h
@@ -0,0 +1,106 @@
+/***************************************************************************
+    copyright            : (C) 2005 by Inge Wallin
+    email                : inge@lysator.liu.se
+ ***************************************************************************/
+/***************************************************************************
+ *                                                                         *
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ ***************************************************************************/
+
+#ifndef PARSER_H
+#define PARSER_H
+
+#include <qstring.h>
+
+/**
+ * @class Parser
+ * This is a general purpose parser originally written by Inge Wallin.
+ *
+ * It is intended to be subclassed; see MoleculeParser.
+ *
+ * @author Inge Wallin
+ */
+class Parser {
+public:
+    // All characters are their own token value per default.
+    static const  int  INT_TOKEN   = 257;
+    static const  int  FLOAT_TOKEN = 258;
+    // Extend this list in your subclass to make a more advanced parser.
+
+    Parser();
+    Parser(const QString& _str);
+    virtual ~Parser();
+
+    /**
+     * Start a new parse.
+     */
+    void   start(const QString& _str);
+
+    /**
+     * Peek at the next character;
+     */
+    int    nextChar()  const { return m_nextChar;  }
+
+    /**
+     * Peek at the next token.
+     */
+    int    nextToken() const { return m_nextToken; }
+
+    /**
+     * Get the value stored for different types of tokens.
+     */
+    int    intVal()    const { return m_intVal;    }
+    float  floatVal()  const { return m_floatVal;  }
+
+private:
+
+    // Try to parse some special datatypes.
+    bool  parseInt(int *_result);
+    bool  parseSimpleFloat(double *_result);
+
+protected:
+
+    /**
+     * Make the next character the current one.
+     */
+    int   getNextChar();
+
+    /**
+     * Make the next non-space character the current one.
+     */
+    int   skipWhitespace();
+
+    /**
+     * Fetches the next token.
+     */
+    virtual int   getNextToken();
+
+private:
+    QString  m_str;
+    int      m_index;
+    int      m_nextChar;
+
+ protected:
+
+    // Lexical analysis and token handling.  These members need to be
+    // protected instead of private since we want to be able to
+    // reimplement getNextToken().
+
+    /**
+     * The next token to be used in the parser.
+     */
+    int      m_nextToken;
+
+    // Values for the respective token.  These could be made into a
+    // union, but I don't think it is necessary to bother, since they
+    // are so few and we don't instantiate a lot of copies of the
+    // parser.
+    int      m_intVal;		// Valid if m_nextToken == INT_TOKEN
+    double   m_floatVal;	// Valid if m_nextToken == FLOAT_TOKEN
+};
+
+#endif