From: Carsten Niehaus Date: Fri, 30 Dec 2005 12:27:39 +0000 (+0000) Subject: * Add the classes needed for the moleculeparser. X-Git-Tag: v3.80.2~203 X-Git-Url: https://git.rmz.fi/?a=commitdiff_plain;h=b2860812339ce07169800d916c44bfd527133a96;p=libqmvoc.git * Add the classes needed for the moleculeparser. * Not yet in the Makefile.am as qt-copy still compiles here and I cannot check if it compiles&links * Added some /**@class foo*/ comments to enable doxygen-generation Need to wait until EBN is syncing to check what is missing svn path=/trunk/KDE/kdeedu/libkdeedu/; revision=492590 --- diff --git a/libscience/moleculeparser.cpp b/libscience/moleculeparser.cpp new file mode 100644 index 0000000..7f7845c --- /dev/null +++ b/libscience/moleculeparser.cpp @@ -0,0 +1,304 @@ +/*************************************************************************** + copyright : (C) 2005 by Inge Wallin + email : inge@lysator.liu.se + ***************************************************************************/ +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + + +#include + +#include + +#include "kalziumdataobject.h" +#include "moleculeparser.h" + + +// ================================================================ +// class ElementCountMap + + + +ElementCountMap::ElementCountMap() +{ + m_map.clear(); +} + + +ElementCountMap::~ElementCountMap() +{ +} + + +ElementCount * +ElementCountMap::search(Element *_element) +{ + QList::ConstIterator it = m_map.constBegin(); + const QList::ConstIterator itEnd = m_map.constEnd(); + + for (; it != itEnd; ++it) { + if ((*it)->element() == _element) + return *it; + } + + return 0; +} + + +void +ElementCountMap::add(ElementCountMap &_map) +{ + QList::ConstIterator it = _map.m_map.constBegin(); + const QList::ConstIterator itEnd = _map.m_map.constEnd(); + + // Step throught _map and for each element, add it to the current one. + for (; it != itEnd; ++it) { + add((*it)->m_element, (*it)->m_count); + } + +} + + +void +ElementCountMap::add(Element *_element, int _count) +{ + ElementCount *elemCount; + + elemCount = search(_element); + if (elemCount) + elemCount->m_count += _count; + else + m_map.append(new ElementCount(_element, _count)); +} + + +void +ElementCountMap::multiply(int _factor) +{ + Iterator it = begin(); + Iterator itEnd = end(); + + for (; it != itEnd; ++it) + (*it)->multiply(_factor); +} + + +// ================================================================ +// class MoleculeParser + + +MoleculeParser::MoleculeParser() + : Parser() +{ +} + + +MoleculeParser::MoleculeParser(const QString& _str) + : Parser(_str) +{ +} + + +MoleculeParser::~MoleculeParser() +{ + //Parser::~Parser(); +} + + +// ---------------------------------------------------------------- +// public methods + + +// Try to parse the molecule and get the weight of it. +// +// This method also acts as the main loop. + +bool +MoleculeParser::weight(QString _moleculeString, + double *_resultMass, + ElementCountMap *_resultMap) +{ + // Clear the result variables and set m_error to false + _resultMap->clear(); + m_error = false; + *_resultMass = 0.0; + + // Initialize the parsing process, and parse te molecule. + start(_moleculeString); + parseSubmolecule(_resultMass, _resultMap); + + if (nextToken() != -1) + return false; + + if ( m_error )//there was an error in the input... + return false; + + return true; +} + + +// ---------------------------------------------------------------- +// helper methods for the public methods + + +// Parse a submolecule. This is a list of terms. +// + +bool +MoleculeParser::parseSubmolecule(double *_resultMass, + ElementCountMap *_resultMap) +{ + double subMass = 0.0; + ElementCountMap subMap; + + *_resultMass = 0.0; + _resultMap->clear(); + while (parseTerm(&subMass, &subMap)) { + //kdDebug() << "Parsed a term, weight = " << subresult << endl; + + // Add the mass and composition of the submolecule to the total. + *_resultMass += subMass; + _resultMap->add(subMap); + } + + return true; +} + + +// Parse a term within the molecule, i.e. a single atom or a +// submolecule within parenthesis followed by an optional number. +// Examples: Bk, Mn2, (COOH)2 +// +// Return true if correct, otherwise return false. + +// If correct, the mass of the term is returned in *_resultMass, and +// the flattened composition of the molecule in *_resultMap. +// + +bool +MoleculeParser::parseTerm(double *_resultMass, + ElementCountMap *_resultMap) +{ + *_resultMass = 0.0; + _resultMap->clear(); + +#if 0 + kdDebug() << "parseTerm(): Next token = " + << nextToken() << endl; +#endif + if (nextToken() == ELEMENT_TOKEN) { + //kdDebug() << "Parsed an element: " << m_elementVal->symbol() << endl; + *_resultMass = m_elementVal->dataAsVariant( ChemicalDataObject::mass ).toDouble(); + _resultMap->add(m_elementVal, 1); + + getNextToken(); + } + + else if (nextToken() == '(') { + // A submolecule. + + getNextToken(); + parseSubmolecule(_resultMass, _resultMap); + + // Must end in a ")". + if (nextToken() == ')') { + //kdDebug() << "Parsed a submolecule. weight = " << *_result << endl; + getNextToken(); + } + else + return false; + } + else + // Neither an element nor a list within (). + return false; + + // Optional number. + if (nextToken() == INT_TOKEN) { + //kdDebug() << "Parsed a number: " << intVal() << endl; + + *_resultMass *= intVal(); + _resultMap->multiply(intVal()); + + getNextToken(); + } + + kdDebug() << "Weight of term = " << *_resultMass << endl; + return true; +} + + +// ---------------------------------------------------------------- +// protected methods + + +// Extend Parser::getNextToken with elements. + +int +MoleculeParser::getNextToken() +{ + QString elementName; + +#if 0 + kdDebug() << "getNextToken(): Next character = " + << nextChar() << endl; +#endif + + // Check if the token is an element name. + if ('A' <= nextChar() && nextChar() <= 'Z') { + elementName = char(nextChar()); + getNextChar(); + + if ('a' <= nextChar() && nextChar() <= 'z') { + elementName.append(char(nextChar())); + getNextChar(); + } + + // Look up the element from the name.. + m_elementVal = lookupElement(elementName); + if (m_elementVal) + { + m_nextToken = ELEMENT_TOKEN; + } + else + m_nextToken = -1; + } + else + return Parser::getNextToken(); + + return m_nextToken; +} + + +// ---------------------------------------------------------------- +// private methods + + +Element * +MoleculeParser::lookupElement( const QString& _name ) +{ + QList elementList = KalziumDataObject::instance()->ElementList; + + //kdDebug() << "looking up " << _name << endl; + + QList::ConstIterator it = elementList.constBegin(); + const QList::ConstIterator end = elementList.constEnd(); + + for (; it != end; ++it) { + if ( (*it)->dataAsVariant(ChemicalDataObject::symbol) == _name ) { + kdDebug() << "Found element " << _name << endl; + return *it; + } + } + + //if there is an error make m_error true. + m_error = true; + + kdDebug() << k_funcinfo << "no such element, parsing error!: " << _name << endl; + return NULL; +} diff --git a/libscience/moleculeparser.h b/libscience/moleculeparser.h new file mode 100644 index 0000000..2948dca --- /dev/null +++ b/libscience/moleculeparser.h @@ -0,0 +1,139 @@ +/*************************************************************************** + copyright : (C) 2005 by Inge Wallin + email : inge@lysator.liu.se + ***************************************************************************/ +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#ifndef MOLECULEPARSER_H +#define MOLECULEPARSER_H + +#include "element.h" +#include "parser.h" + +#include +#include + + +/** + * @class ElementCountMap + */ +class ElementCount { + public: + ElementCount(Element *_element, int _count) + { + m_element = _element; + m_count = _count; + } + ElementCount(Element *_element) + { + m_element = _element; + m_count = 0; + } + + ~ElementCount(); + + Element *element() const { return m_element; } + int count() const { return m_count; } + void add(int _count) { m_count += _count; } + void multiply(int _factor) { m_count *= _factor; } + + Element *m_element; + int m_count; +}; + + +/** + * @class ElementCount + */ +class ElementCountMap { + public: + ElementCountMap(); + ~ElementCountMap(); + + void clear() { m_map.clear(); } + + ElementCount *search(Element *_element); + void add(ElementCountMap &_map); + void add(Element *_element, int _count); + void multiply(int _factor); + + typedef QList::Iterator Iterator; + Iterator begin() { return m_map.begin(); } + Iterator end() { return m_map.end(); } + + private: + QList m_map; +}; + + + +/** + * @class MoleculeParser + * + * Parse molecule formulas. + * + * Usage: + * @code + * MoleculeParser parser; + * QString chemical_formula = "C2H5OH"; + * double weight; + * + * if (parser.weight(chemical_formula, &weight)) + * cout << "Weight of " << chemical_formula << " = " << weight << ".\n"; + * else + * cout << "Parse error\n"; + * @endcode + * + * @author Inge Wallin + */ +class MoleculeParser : public Parser { + +public: + static const int ELEMENT_TOKEN = 300; + + MoleculeParser(); + MoleculeParser( const QString& _str); + virtual ~MoleculeParser(); + + /** + * Try to parse the molecule @p molecule and get the weight of it. + * The calculated weight is stored in @p _result. + * + * @return whether the parsing was successful or not + */ + bool weight(QString _moleculeString, + double *_resultMass, + ElementCountMap *_resultMap); + private: + // Helper functions + bool parseSubmolecule(double *_resultMass, + ElementCountMap *_resultMap); + bool parseTerm(double *_resultMass, + ElementCountMap *_resultMap); + + Element *lookupElement( const QString& _name ); + + QMap m_elementMap; + + //if this booloean is "true" the parser found an error + bool m_error; + +protected: + + /** + * Extends the standard tokenizer in Parser::getNextToken(). + */ + virtual int getNextToken(); + +private: + Element *m_elementVal; // Valid if m_nextToken == ELEMENT_TOKEN +}; + +#endif diff --git a/libscience/parser.cpp b/libscience/parser.cpp new file mode 100644 index 0000000..38b4683 --- /dev/null +++ b/libscience/parser.cpp @@ -0,0 +1,217 @@ +/*************************************************************************** + copyright : (C) 2005 by Inge Wallin + email : inge@lysator.liu.se + ***************************************************************************/ +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + + +#include + +#include + +#include "parser.h" + + +Parser::Parser() +{ + start(QString::null); +} + + +Parser::Parser(const QString& _str) +{ + start(_str); +} + + +Parser::~Parser() +{ +} + + +void +Parser::start(const QString& _str) +{ + m_str = _str; + + if (_str.isNull()) { + m_index = -1; + m_nextChar = -1; + m_nextToken = -1; + } + else { + m_index = 0; + m_nextChar = m_str.at(0).toLatin1(); + getNextToken(); + } +} + + +// ---------------------------------------------------------------- + + +// Skip whitespace, and try to parse the following characters as an int. +// +// Return true if successful. + +bool +Parser::parseInt(int *_result) +{ + int sign = 1; + + skipWhitespace(); + + if (m_nextChar == '-') { + sign = -1; + getNextChar(); + } + + if (!isdigit(m_nextChar)) + return false; + + int result = 0; + while (isdigit(m_nextChar)) { + result = result * 10 + (m_nextChar - '0'); + getNextChar(); + } + + *_result = sign * result; + return true; +} + + +// Skip whitespace, and try to parse the following characters as a +// simple float of the type -?[0-9]+'.'?[0-9]* +// +// Return true if successful. + +bool +Parser::parseSimpleFloat(double *_result) +{ + double sign = 1.0; + + skipWhitespace(); + if (m_nextChar == '-') { + sign = -1.0; + getNextChar(); + } + + if (!isdigit(m_nextChar)) + return false; + + double result = 0.0; + + // The integer. + while (isdigit(m_nextChar)) { + result = result * 10.0 + (double) (m_nextChar - '0'); + getNextChar(); + } + *_result = result; + + if (m_nextChar != '.' || !isdigit(getNextChar())) { + *_result = sign * result; + return true; + } + + double decimal = 0.1; + while (isdigit(m_nextChar)) { + result += decimal * (double) (m_nextChar - '0'); + decimal /= 10.0; + getNextChar(); + } + + *_result = sign * result; + return true; +} + + +// ---------------------------------------------------------------- +// protected methods + + +int +Parser::getNextChar() +{ + if (m_index == -1) + return -1; + + // If end of string, then reset the parser. + if (m_index == (int) m_str.length()) { + m_index = -1; + m_nextChar = -1; + } + else + m_nextChar = m_str.at(++m_index).toLatin1(); + + // Take care of null-terminated strings. + if (m_nextChar == 0) { + m_index = -1; + m_nextChar = -1; + } + + //kdDebug() << "Parser::getNextChar(): char = " << m_nextChar << endl; + + return m_nextChar; +} + + +int +Parser::skipWhitespace() +{ + while (QChar(m_nextChar).isSpace()) + getNextChar(); + + return m_nextChar; +} + + +// Get the next token. This corresponds to the lexical analyzer of a +// standard parser, e.g as generated by lex. +// +// This basic parser supports integers and simple +// floats. Reimplement this method to extend it. + +int +Parser::getNextToken() +{ + int saveIndex = m_index; + + skipWhitespace(); + if (isdigit(nextChar())) { + // At this point we know that there is a valid number in the + // string. The only question now, is whether it is an int or a + // float. + + parseInt(&m_intVal); + + skipWhitespace(); + if (nextChar() == '.') { + m_index = saveIndex; + + // No need to check since we already know it is correct. + (void) parseSimpleFloat(&m_floatVal); + m_nextToken = FLOAT_TOKEN; + } + else + m_nextToken = INT_TOKEN; + } + + else if (nextChar() != -1) { + // Any character. + m_nextToken = nextChar(); + getNextChar(); + } + + else + // End of string. + m_nextToken = -1; + + return m_nextToken; +} diff --git a/libscience/parser.h b/libscience/parser.h new file mode 100644 index 0000000..37355f8 --- /dev/null +++ b/libscience/parser.h @@ -0,0 +1,106 @@ +/*************************************************************************** + copyright : (C) 2005 by Inge Wallin + email : inge@lysator.liu.se + ***************************************************************************/ +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#ifndef PARSER_H +#define PARSER_H + +#include + +/** + * @class Parser + * This is a general purpose parser originally written by Inge Wallin. + * + * It is intended to be subclassed; see MoleculeParser. + * + * @author Inge Wallin + */ +class Parser { +public: + // All characters are their own token value per default. + static const int INT_TOKEN = 257; + static const int FLOAT_TOKEN = 258; + // Extend this list in your subclass to make a more advanced parser. + + Parser(); + Parser(const QString& _str); + virtual ~Parser(); + + /** + * Start a new parse. + */ + void start(const QString& _str); + + /** + * Peek at the next character; + */ + int nextChar() const { return m_nextChar; } + + /** + * Peek at the next token. + */ + int nextToken() const { return m_nextToken; } + + /** + * Get the value stored for different types of tokens. + */ + int intVal() const { return m_intVal; } + float floatVal() const { return m_floatVal; } + +private: + + // Try to parse some special datatypes. + bool parseInt(int *_result); + bool parseSimpleFloat(double *_result); + +protected: + + /** + * Make the next character the current one. + */ + int getNextChar(); + + /** + * Make the next non-space character the current one. + */ + int skipWhitespace(); + + /** + * Fetches the next token. + */ + virtual int getNextToken(); + +private: + QString m_str; + int m_index; + int m_nextChar; + + protected: + + // Lexical analysis and token handling. These members need to be + // protected instead of private since we want to be able to + // reimplement getNextToken(). + + /** + * The next token to be used in the parser. + */ + int m_nextToken; + + // Values for the respective token. These could be made into a + // union, but I don't think it is necessary to bother, since they + // are so few and we don't instantiate a lot of copies of the + // parser. + int m_intVal; // Valid if m_nextToken == INT_TOKEN + double m_floatVal; // Valid if m_nextToken == FLOAT_TOKEN +}; + +#endif