From ac1d17724ebb33bf306b98055d1d110dd5ab71c8 Mon Sep 17 00:00:00 2001 From: Jeremy Paul Whiting Date: Sun, 8 Jul 2007 04:59:17 +0000 Subject: [PATCH] kvtml2 reader initial checkin, currently passes work on to kvtmlreader if it detects no version attribute, and only reads original expression and information section so far for kvtml2 files svn path=/trunk/KDE/kdeedu/libkdeedu/; revision=685157 --- kdeeducore/CMakeLists.txt | 1 + kdeeducore/keduvocdocument.cpp | 6 +- kdeeducore/keduvockvtml2reader.cpp | 1354 ++++++++++++++++++++++++++++ kdeeducore/keduvockvtml2reader.h | 117 +++ 4 files changed, 1475 insertions(+), 3 deletions(-) create mode 100644 kdeeducore/keduvockvtml2reader.cpp create mode 100644 kdeeducore/keduvockvtml2reader.h diff --git a/kdeeducore/CMakeLists.txt b/kdeeducore/CMakeLists.txt index ef30e0a..e06a138 100644 --- a/kdeeducore/CMakeLists.txt +++ b/kdeeducore/CMakeLists.txt @@ -10,6 +10,7 @@ set(kdeeducore_LIB_SRCS keduvocgrade.cpp keduvocgrammar.cpp keduvockvtmlreader.cpp + keduvockvtml2reader.cpp keduvockvtmlwriter.cpp keduvocmultiplechoice.cpp keduvocpaukerreader.cpp diff --git a/kdeeducore/keduvocdocument.cpp b/kdeeducore/keduvocdocument.cpp index 3049619..5fa0a99 100644 --- a/kdeeducore/keduvocdocument.cpp +++ b/kdeeducore/keduvocdocument.cpp @@ -34,7 +34,7 @@ #include "keduvockvtmlwriter.h" #include "keduvoccsvreader.h" #include "keduvoccsvwriter.h" -#include "keduvockvtmlreader.h" +#include "keduvockvtml2reader.h" #include "keduvocwqlreader.h" #include "keduvocpaukerreader.h" #include "keduvocvokabelnreader.h" @@ -229,7 +229,7 @@ bool KEduVocDocument::open(const KUrl& url) switch (ft) { case kvtml: { - KEduVocKvtmlReader kvtmlReader(f); + KEduVocKvtml2Reader kvtmlReader(f); read = kvtmlReader.readDoc(this); if (!read) errorMessage = kvtmlReader.errorMessage(); @@ -287,7 +287,7 @@ bool KEduVocDocument::open(const KUrl& url) default: { - KEduVocKvtmlReader kvtmlReader(f); + KEduVocKvtml2Reader kvtmlReader(f); read = kvtmlReader.readDoc(this); if (!read) errorMessage = kvtmlReader.errorMessage(); diff --git a/kdeeducore/keduvockvtml2reader.cpp b/kdeeducore/keduvockvtml2reader.cpp new file mode 100644 index 0000000..75fc95d --- /dev/null +++ b/kdeeducore/keduvockvtml2reader.cpp @@ -0,0 +1,1354 @@ +/*************************************************************************** + read a KEduVocDocument from a KVTML file + ----------------------------------------------------------------------- + copyright : (C) 1999-2001 Ewald Arnold + (C) 2001 The KDE-EDU team + (C) 2005 Eric Pignet + (C) 2007 Peter Hedlund + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include "keduvockvtml2reader.h" + +#include +#include +#include + +#include +#include + +#include "keduvocdocument.h" +#include "kvtml2defs.h" +#include "kvtmldefs.h" +#include "keduvockvtmlreader.h" + +KEduVocKvtml2Reader::KEduVocKvtml2Reader(QIODevice *file) +{ + // the file must be already open + m_inputFile = file; + m_errorMessage = ""; +} + + +bool KEduVocKvtml2Reader::readDoc(KEduVocDocument *doc) +{ + m_doc = doc; + m_cols = 0; + m_lines = 0; + + QDomDocument domDoc("KEduVocDocument"); + + if (!domDoc.setContent(m_inputFile, &m_errorMessage)) + return false; + + QDomElement domElementKvtml = domDoc.documentElement(); + if (domElementKvtml.tagName() != KVTML_TAG) + { + m_errorMessage = i18n("This is not a KDE Vocabulary document."); + return false; + } + + if (domElementKvtml.attribute(KVTML_VERSION).toFloat() < 2.0) + { + // read the file with the old format + + // first reset the file to the beginning + m_inputFile->seek(0); + KEduVocKvtmlReader oldFormat(m_inputFile); + + // get the return value + bool retval = oldFormat.readDoc(doc); + + // pass the errormessage up + m_errorMessage = oldFormat.errorMessage(); + return retval; + } + + //------------------------------------------------------------------------- + // Information + //------------------------------------------------------------------------- + + QDomElement info = domElementKvtml.firstChildElement(KVTML_INFORMATION); + if (!info.isNull()) + { + // read the generator + QDomElement generator = info.firstChildElement(KVTML_GENERATOR); + if (!generator.isNull()) + { + m_doc->setGenerator(generator.text()); + // add the version if it's there + int pos = m_doc->generator().lastIndexOf(KVD_VERS_PREFIX); + if (pos >= 0) + { + m_doc->setVersion(m_doc->generator().remove(0, pos + 2)); + } + } + + // read the title + QDomElement title = info.firstChildElement(KVTML_TITLE); + if (!title.isNull()) + { + m_doc->setTitle(title.text()); + } + + // read the comment + QDomElement comment = info.firstChildElement(KVTML_COMMENT); + if (!comment.isNull()) + { + m_doc->setDocumentRemark(comment.text()); + } + + // read the author + QDomElement author = info.firstChildElement(KVTML_AUTHOR); + if (!author.isNull()) + { + m_doc->setAuthor(author.text()); + } + + // read the license + QDomElement license = info.firstChildElement(KVTML_LICENSE); + if (!license.isNull()) + { + m_doc->setLicense(license.text()); + } + } + + + // possibly add lines support to information section of kvtml2 dtd? + //documentAttribute = domElementKvtml.attributeNode(KV_LINES); + //if (!documentAttribute.isNull()) + // m_lines = documentAttribute.value().toInt(); + + //------------------------------------------------------------------------- + // Children + //------------------------------------------------------------------------- + + bool result = readBody(domElementKvtml); // read vocabulary + + return result; +} + + +bool KEduVocKvtml2Reader::readBody(QDomElement &domElementParent) +{ + bool result = false; + + QDomElement identifiers = domElementParent.firstChildElement(KVTML_IDENTIFIERS); + QDomElement currentElement; + + if (!identifiers.isNull()) + { + QDomNodeList entryList = identifiers.elementsByTagName(KVTML_IDENTIFIER); + if (entryList.length() <= 0) + return false; + + for (int i = 0; i < entryList.count(); ++i) + { + currentElement = entryList.item(i).toElement(); + if (currentElement.parentNode() == identifiers) + { + result = readExpression(currentElement); + if (!result) + return false; + } + } + } + + // old code for kvtml + //QDomElement currentElement; + + //currentElement = domElementParent.firstChildElement(KV_LESS_GRP); + //if (!currentElement.isNull()) { + // result = readLesson(currentElement); + // if (!result) + // return false; + //} + + //currentElement = domElementParent.firstChildElement(KV_ARTICLE_GRP); + //if (!currentElement.isNull()) { + // result = readArticle(currentElement); + // if (!result) + // return false; + //} + + //currentElement = domElementParent.firstChildElement(KV_CONJUG_GRP); + //if (!currentElement.isNull()) { + // QList conjugations; + // result = readConjug(currentElement, conjugations); + // if (result) { + // KEduVocConjugation conjug; + // for (int i = 0; i< conjugations.count(); i++) { + // conjug = conjugations[i]; + // m_doc->setConjugation(i, conjug); + // } + // } + // else + // return false; + //} + + //currentElement = domElementParent.firstChildElement(KV_OPTION_GRP); + //if (!currentElement.isNull()) { + // result = readOptions(currentElement); + // if (!result) + // return false; + //} + + //currentElement = domElementParent.firstChildElement(KV_TYPE_GRP); + //if (!currentElement.isNull()) { + // result = readType(currentElement); + // if (!result) + // return false; + //} + + //currentElement = domElementParent.firstChildElement(KV_TENSE_GRP); + //if (!currentElement.isNull()) { + // result = readTense(currentElement); + // if (!result) + // return false; + //} + + //currentElement = domElementParent.firstChildElement(KV_USAGE_GRP); + //if (!currentElement.isNull()) { + // result = readUsage(currentElement); + // if (!result) + // return false; + //} + + return result; +} + + +bool KEduVocKvtml2Reader::readLesson(QDomElement &domElementParent) +{ + QString s; + QStringList descriptions; + QDomAttr attribute; + QDomElement currentElement; + + //------------------------------------------------------------------------- + // Attributes + //------------------------------------------------------------------------- + + attribute = domElementParent.attributeNode(KV_SIZEHINT); + if (!attribute.isNull()) + m_doc->setSizeHint(-1, attribute.value().toInt()); + + //------------------------------------------------------------------------- + // Children + //------------------------------------------------------------------------- + + QDomNodeList entryList = domElementParent.elementsByTagName(KV_LESS_DESC); + if (entryList.length() <= 0) + return false; + + descriptions.clear(); + QList inQueryList; + inQueryList.clear(); + + for (int i = 0; i < entryList.count(); ++i) { + currentElement = entryList.item(i).toElement(); + if (currentElement.parentNode() == domElementParent) { + int no = 0; + bool isCurr = false; + + attribute = currentElement.attributeNode(KV_LESS_NO); + if (!attribute.isNull()) + no = attribute.value().toInt(); + + attribute = currentElement.attributeNode(KV_LESS_CURR); + if (!attribute.isNull()) + isCurr = attribute.value().toInt() != 0; + + if (isCurr && no != 0) + m_doc->setCurrentLesson(no); + + attribute = currentElement.attributeNode(KV_LESS_QUERY); + if (!attribute.isNull()) + if (attribute.value().toInt() != 0 && no > 0) + inQueryList.append(no); + + s = currentElement.text(); + if (s.isNull()) + s = ""; + descriptions.append(s); + } + } + + if (inQueryList.count() > 0) + m_doc->setLessonsInQuery(inQueryList); + m_doc->setLessonDescriptions(descriptions); + + return true; +} + + +bool KEduVocKvtml2Reader::readArticle(QDomElement &domElementParent) +/* +
+ lang determines also lang order in entries !! + eine which must NOT differ + die + ein + der + ein + das + +
+*/ +{ + + QString s; + QDomAttr attribute; + QDomElement currentElement; + QDomElement article; + + QDomNodeList entryList = domElementParent.elementsByTagName(KV_ART_ENTRY); + if (entryList.length() <= 0) + return false; + + for (int i = 0; i < entryList.count(); ++i) { + +kDebug() << "KEduVocKvtml2Reader::readArticle() read " << entryList.count() << " articles. " << endl; + currentElement = entryList.item(i).toElement(); + if (currentElement.parentNode() == domElementParent) { + QString lang; + attribute = currentElement.attributeNode(KV_LANG); + + if (m_doc->identifierCount() <= i) + { + // first entry + if (!attribute.isNull()) // no definition in first entry + lang = attribute.value(); + else + lang = "original"; + m_doc->appendIdentifier(lang); +kDebug() << " Identifier " << i << " is " << lang << endl; + } + else + { + if (!attribute.isNull() && attribute.value() != m_doc->identifier(i)) + { + // different originals ? + m_errorMessage = i18n("Ambiguous definition of language code"); + return false; + } + } + + //--------- + // Children + + QString fem_def = ""; + QString mal_def = ""; + QString nat_def = ""; + QString fem_indef = ""; + QString mal_indef = ""; + QString nat_indef = ""; + + article = currentElement.firstChildElement(KV_ART_FD); + if (!article.isNull()) { + fem_def = article.text(); + if (fem_def.isNull()) + fem_def = ""; + } + + article = currentElement.firstChildElement(KV_ART_FI); + if (!article.isNull()) { + fem_indef = article.text(); + if (fem_indef.isNull()) + fem_indef = ""; + } + + article = currentElement.firstChildElement(KV_ART_MD); + if (!article.isNull()) { + mal_def = article.text(); + if (mal_def.isNull()) + mal_def = ""; + } + + article = currentElement.firstChildElement(KV_ART_MI); + if (!article.isNull()) { + mal_indef = article.text(); + if (mal_indef.isNull()) + mal_indef = ""; + } + + article = currentElement.firstChildElement(KV_ART_ND); + if (!article.isNull()) { + nat_def = article.text(); + if (nat_def.isNull()) + nat_def = ""; + } + + article = currentElement.firstChildElement(KV_ART_NI); + if (!article.isNull()) { + nat_indef = article.text(); + if (nat_indef.isNull()) + nat_indef = ""; + } + + m_doc->setArticle(i, KEduVocArticle(fem_def, fem_indef, mal_def, mal_indef, nat_def, nat_indef)); + } + } + + return true; +} + + +bool KEduVocKvtml2Reader::readConjug(QDomElement &domElementParent, QList &curr_conjug) +/* + used in header for definiton of "prefix" + lang determines also lang order in entries !! + I which must NOT differ + you<2> + he + she + it + we + you + they + they + they + + + + and in entry for definition of tenses of (irreg.) verbs + + go + go + goes + goes + goes + go + go + go + go + go + + +*/ +{ + QString s; + bool p3_common; + bool s3_common; + QString pers1_sing; + QString pers2_sing; + QString pers3_m_sing; + QString pers3_f_sing; + QString pers3_n_sing; + QString pers1_plur; + QString pers2_plur; + QString pers3_m_plur; + QString pers3_f_plur; + QString pers3_n_plur; + QString lang; + QString type; + int count = 0; + + curr_conjug.clear(); + curr_conjug.append(KEduVocConjugation()); + + QDomElement domElementConjugChild = domElementParent.firstChild().toElement(); + while (!domElementConjugChild.isNull()) + { + if (domElementConjugChild.tagName() == KV_CON_ENTRY) + { + type = CONJ_PREFIX; + + //---------- + // Attribute + + QString lang; + QDomAttr domAttrLang = domElementConjugChild.attributeNode(KV_LANG); + + if (m_doc->identifierCount() <= count) + { + // first entry + if (!domAttrLang.isNull()) // no definition in first entry + lang = domAttrLang.value(); + else + lang = "original"; + m_doc->appendIdentifier(lang); + } + else + { + if (!domAttrLang.isNull() && domAttrLang.value() != m_doc->identifier(count)) + { + // different originals ? + m_errorMessage = i18n("Ambiguous definition of language code"); + return false; + } + } + } + else if (domElementConjugChild.tagName() == KV_CON_TYPE) + { + //---------- + // Attribute + + QDomAttr domAttrLang = domElementConjugChild.attributeNode(KV_CON_NAME); + type = domAttrLang.value(); + if (type.isNull()) + type = ""; + + if (type.length() != 0 && type.left(1) == UL_USER_TENSE) + { + int num = qMin(type.mid (1, 40).toInt(), 1000); // paranoia check + if (num > m_doc->tenseDescriptions().count()) + { + // description missing ? + QString s; + QStringList sl = m_doc->tenseDescriptions(); + for (int i = m_doc->tenseDescriptions().count(); i < num; i++) + { + s.setNum(i + 1); + s.prepend("#"); // invent descr according to number + sl.append(s); + } + m_doc->setTenseDescriptions(sl); + } + } + } + + pers1_sing = ""; + pers2_sing = ""; + pers3_m_sing = ""; + pers3_f_sing = ""; + pers3_n_sing = ""; + pers1_plur = ""; + pers2_plur = ""; + pers3_m_plur = ""; + pers3_f_plur = ""; + pers3_n_plur = ""; + p3_common = false; + s3_common = false; + + QDomElement domElementConjugGrandChild = domElementConjugChild.firstChild().toElement(); + while (!domElementConjugGrandChild.isNull()) + { + if (domElementConjugGrandChild.tagName() == KV_CON_P1S) + { + pers1_sing = domElementConjugGrandChild.text(); + if (pers1_sing.isNull()) + pers1_sing = ""; + } + else if (domElementConjugGrandChild.tagName() == KV_CON_P2S) + { + pers2_sing = domElementConjugGrandChild.text(); + if (pers2_sing.isNull()) + pers2_sing = ""; + } + else if (domElementConjugGrandChild.tagName() == KV_CON_P3SF) + { + QDomAttr domAttrCommon = domElementConjugGrandChild.attributeNode(KV_CONJ_COMMON); + if (!domAttrCommon.isNull()) + s3_common = domAttrCommon.value().toInt(); // returns 0 if the conversion fails + + pers3_f_sing = domElementConjugGrandChild.text(); + if (pers3_f_sing.isNull()) + pers3_f_sing = ""; + } + else if (domElementConjugGrandChild.tagName() == KV_CON_P3SM) + { + pers3_m_sing = domElementConjugGrandChild.text(); + if (pers3_m_sing.isNull()) + pers3_m_sing = ""; + } + else if (domElementConjugGrandChild.tagName() == KV_CON_P3SN) + { + pers3_n_sing = domElementConjugGrandChild.text(); + if (pers3_n_sing.isNull()) + pers3_n_sing = ""; + } + else if (domElementConjugGrandChild.tagName() == KV_CON_P1P) + { + pers1_plur = domElementConjugGrandChild.text(); + if (pers1_plur.isNull()) + pers1_plur = ""; + } + else if (domElementConjugGrandChild.tagName() == KV_CON_P2P) + { + pers2_plur = domElementConjugGrandChild.text(); + if (pers2_plur.isNull()) + pers2_plur = ""; + } + else if (domElementConjugGrandChild.tagName() == KV_CON_P3PF) + { + QDomAttr domAttrCommon = domElementConjugGrandChild.attributeNode(KV_CONJ_COMMON); + if (!domAttrCommon.isNull()) + p3_common = domAttrCommon.value().toInt(); // returns 0 if the conversion fails + + pers3_f_plur = domElementConjugGrandChild.text(); + if (pers3_f_plur.isNull()) + pers3_f_plur = ""; + } + else if (domElementConjugGrandChild.tagName() == KV_CON_P3PM) + { + pers3_m_plur = domElementConjugGrandChild.text(); + if (pers3_m_plur.isNull()) + pers3_m_plur = ""; + } + else if (domElementConjugGrandChild.tagName() == KV_CON_P3PN) + { + pers3_n_plur = domElementConjugGrandChild.text(); + if (pers3_n_plur.isNull()) + pers3_n_plur = ""; + } + else + { + return false; + } + + domElementConjugGrandChild = domElementConjugGrandChild.nextSibling().toElement(); + } + + if (domElementConjugChild.tagName() == KV_CON_ENTRY) + while (count + 1 > (int) curr_conjug.size() ) + curr_conjug.append(KEduVocConjugation()); + + curr_conjug[count].setPers3SingularCommon(type, s3_common); + curr_conjug[count].setPers3PluralCommon(type, p3_common); + curr_conjug[count].setPers1Singular(type, pers1_sing); + curr_conjug[count].setPers2Singular(type, pers2_sing); + curr_conjug[count].setPers3FemaleSingular(type, pers3_f_sing); + curr_conjug[count].setPers3MaleSingular(type, pers3_m_sing); + curr_conjug[count].setPers3NaturalSingular(type, pers3_n_sing); + curr_conjug[count].setPers1Plural(type, pers1_plur); + curr_conjug[count].setPers2Plural(type, pers2_plur); + curr_conjug[count].setPers3FemalePlural(type, pers3_f_plur); + curr_conjug[count].setPers3MalePlural(type, pers3_m_plur); + curr_conjug[count].setPers3NaturalPlural(type, pers3_n_plur); + + if (domElementConjugChild.tagName() == KV_CON_ENTRY) + count++; + + domElementConjugChild = domElementConjugChild.nextSibling().toElement(); + } + + return true; +} + + +bool KEduVocKvtml2Reader::readOptions(QDomElement &domElementParent) +{ + m_doc->setSortingEnabled(true); + QDomElement currentElement = domElementParent.firstChildElement(KV_OPT_SORT); + if (!currentElement.isNull()) { + QDomAttr attribute = currentElement.attributeNode(KV_BOOL_FLAG); + if (!attribute.isNull()) + { + bool ok = true; + m_doc->setSortingEnabled(attribute.value().toInt(&ok)); // returns 0 if the conversion fails + if (!ok) + m_doc->setSortingEnabled(true); + } + } + + return true; +} + + +bool KEduVocKvtml2Reader::readType(QDomElement &domElementParent) +{ + QString s; + QDomElement currentElement; + QStringList descriptions; + + QDomNodeList entryList = domElementParent.elementsByTagName(KV_TYPE_DESC); + if (entryList.length() <= 0) + return false; + + descriptions.clear(); + + for (int i = 0; i < entryList.count(); ++i) { + currentElement = entryList.item(i).toElement(); + if (currentElement.parentNode() == domElementParent) { + int no = 0; + + QDomAttr attribute = currentElement.attributeNode(KV_TYPE_NO); + if (!attribute.isNull()) + no = attribute.value().toInt(); + + s = currentElement.text(); + if (s.isNull()) + s = ""; + descriptions.append(s); + } + } + + m_doc->setTypeDescriptions(descriptions); + return true; +} + + +bool KEduVocKvtml2Reader::readTense(QDomElement &domElementParent) +{ + QString s; + QDomElement currentElement; + QStringList descriptions; + + QDomNodeList entryList = domElementParent.elementsByTagName(KV_TENSE_DESC); + if (entryList.length() <= 0) + return false; + + descriptions.clear(); + + for (int i = 0; i < entryList.count(); ++i) { + currentElement = entryList.item(i).toElement(); + if (currentElement.parentNode() == domElementParent) { + int no = 0; + + QDomAttr attribute = currentElement.attributeNode(KV_TENSE_NO); + if (!attribute.isNull()) + no = attribute.value().toInt(); + + s = currentElement.text(); + if (s.isNull()) + s = ""; + descriptions.append(s); + } + } + + m_doc->setTenseDescriptions(descriptions); + return true; +} + + +bool KEduVocKvtml2Reader::readUsage(QDomElement &domElementParent) +{ + QString s; + QDomElement currentElement; + QStringList descriptions; + + QDomNodeList entryList = domElementParent.elementsByTagName(KV_USAGE_DESC); + if (entryList.length() <= 0) + return false; + + descriptions.clear(); + + for (int i = 0; i < entryList.count(); ++i) { + currentElement = entryList.item(i).toElement(); + if (currentElement.parentNode() == domElementParent) { + int no = 0; + + QDomAttr attribute = currentElement.attributeNode(KV_USAGE_NO); + if (!attribute.isNull()) + no = attribute.value().toInt(); + + s = currentElement.text(); + if (s.isNull()) + s = ""; + descriptions.append(s); + } + } + + m_doc->setUsageDescriptions(descriptions); + return true; +} + + +bool KEduVocKvtml2Reader::readComparison(QDomElement &domElementParent, KEduVocComparison &comp) +/* + + good + better + best + +*/ +{ + QString s; + comp.clear(); + + QDomElement currentElement; + + currentElement = domElementParent.firstChildElement(KV_COMP_L1); + if (!currentElement.isNull()) { + s = currentElement.text(); + if (s.isNull()) + s = ""; + comp.setL1(s); + } + + currentElement = domElementParent.firstChildElement(KV_COMP_L2); + if (!currentElement.isNull()) { + s = currentElement.text(); + if (s.isNull()) + s = ""; + comp.setL2(s); + } + + currentElement = domElementParent.firstChildElement(KV_COMP_L3); + if (!currentElement.isNull()) { + s = currentElement.text(); + if (s.isNull()) + s = ""; + comp.setL3(s); + } + return true; +} + + +bool KEduVocKvtml2Reader::readMultipleChoice(QDomElement &domElementParent, KEduVocMultipleChoice &mc) +/* + + good + better + best + best 2 + best 3 + +*/ + +{ + QString s; + mc.clear(); + + QDomElement currentElement; + + currentElement = domElementParent.firstChildElement(KV_MC_1); + if (!currentElement.isNull()) { + s = currentElement.text(); + if (s.isNull()) + s = ""; + mc.setMC1(s); + } + + currentElement = domElementParent.firstChildElement(KV_MC_2); + if (!currentElement.isNull()) { + s = currentElement.text(); + if (s.isNull()) + s = ""; + mc.setMC2(s); + } + + currentElement = domElementParent.firstChildElement(KV_MC_3); + if (!currentElement.isNull()) { + s = currentElement.text(); + if (s.isNull()) + s = ""; + mc.setMC3(s); + } + + currentElement = domElementParent.firstChildElement(KV_MC_4); + if (!currentElement.isNull()) { + s = currentElement.text(); + if (s.isNull()) + s = ""; + mc.setMC4(s); + } + + currentElement = domElementParent.firstChildElement(KV_MC_5); + if (!currentElement.isNull()) { + s = currentElement.text(); + if (s.isNull()) + s = ""; + mc.setMC5(s); + } + + mc.normalize(); + return true; +} + + +bool KEduVocKvtml2Reader::readExpressionChildAttributes( QDomElement &domElementExpressionChild, + QString &lang, + grade_t &grade, grade_t &rev_grade, + int &count, int &rev_count, + QDateTime &date, QDateTime &rev_date, + QString &remark, + int &bcount, int &rev_bcount, + QString &query_id, + QString &pronunciation, + int &width, + QString &type, + QString &faux_ami_f, + QString &faux_ami_t, + QString &synonym, + QString &example, + QString &antonym, + QString &usage, + QString ¶phrase) +{ + int pos; + QDomAttr attribute; + + lang = ""; + attribute = domElementExpressionChild.attributeNode(KV_LANG); + if (!attribute.isNull()) + lang = attribute.value(); + + width = -1; + attribute = domElementExpressionChild.attributeNode(KV_SIZEHINT); + if (!attribute.isNull()) + width = attribute.value().toInt(); + + grade = KV_NORM_GRADE; + rev_grade = KV_NORM_GRADE; + attribute = domElementExpressionChild.attributeNode(KV_GRADE); + if (!attribute.isNull()) + { + QString s = attribute.value(); + if ((pos = s.indexOf(';')) >= 1) + { + grade = s.left(pos).toInt(); + rev_grade = s.mid(pos + 1, s.length()).toInt(); + } + else + grade = s.toInt(); + } + + count = 0; + rev_count = 0; + attribute = domElementExpressionChild.attributeNode(KV_COUNT); + if (!attribute.isNull()) + { + QString s = attribute.value(); + if ((pos = s.indexOf(';')) >= 1) + { + count = s.left(pos).toInt(); + rev_count = s.mid(pos + 1, s.length()).toInt(); + } + else + count = s.toInt(); + } + + bcount = 0; + rev_bcount = 0; + attribute = domElementExpressionChild.attributeNode(KV_BAD); + if (!attribute.isNull()) + { + QString s = attribute.value(); + if ((pos = s.indexOf(';')) >= 1) + { + bcount = s.left(pos).toInt(); + rev_bcount = s.mid(pos + 1, s.length()).toInt(); + } + else + bcount = s.toInt(); + } + + date.setTime_t(0); + rev_date.setTime_t(0); + attribute = domElementExpressionChild.attributeNode(KV_DATE); + if (!attribute.isNull()) + { + QString s = attribute.value(); + if ((pos = s.indexOf(';')) >= 1) + { + date.setTime_t(s.left(pos).toInt()); + rev_date.setTime_t(s.mid(pos + 1, s.length()).toInt()); + } + else + date.setTime_t(s.toInt()); + } + + attribute = domElementExpressionChild.attributeNode(KV_DATE2); + if (!attribute.isNull()) + { + //this format is deprecated and ignored. + } + + remark = ""; + attribute = domElementExpressionChild.attributeNode(KV_REMARK); + if (!attribute.isNull()) + remark = attribute.value(); + + faux_ami_f = ""; + attribute = domElementExpressionChild.attributeNode(KV_FAUX_AMI_F); + if (!attribute.isNull()) + faux_ami_f = attribute.value(); + + faux_ami_t = ""; + attribute = domElementExpressionChild.attributeNode(KV_FAUX_AMI_T); + if (!attribute.isNull()) + faux_ami_t = attribute.value(); + + synonym = ""; + attribute = domElementExpressionChild.attributeNode(KV_SYNONYM); + if (!attribute.isNull()) + synonym = attribute.value(); + + example = ""; + attribute = domElementExpressionChild.attributeNode(KV_EXAMPLE); + if (!attribute.isNull()) + example = attribute.value(); + + usage = ""; + attribute = domElementExpressionChild.attributeNode(KV_USAGE); + if (!attribute.isNull()) + { + usage = attribute.value(); + if (usage.length() != 0 && usage.left(1) == UL_USER_USAGE) + { + int num = qMin(usage.mid (1, 40).toInt(), 1000); // paranioa check + if (num > m_doc->usageDescriptions().count()) + { + // description missing ? + QStringList sl = m_doc->usageDescriptions(); + QString s; + for (int i = m_doc->usageDescriptions().count(); i < num; i++) + { + s.setNum(i + 1); + s.prepend("#"); // invent descr according to number + sl.append(s); + } + m_doc->setUsageDescriptions(sl); + } + } + } + + paraphrase = ""; + attribute = domElementExpressionChild.attributeNode(KV_PARAPHRASE); + if (!attribute.isNull()) + paraphrase = attribute.value(); + + antonym = ""; + attribute = domElementExpressionChild.attributeNode(KV_ANTONYM); + if (!attribute.isNull()) + antonym = attribute.value(); + + attribute = domElementExpressionChild.attributeNode(KV_EXPRTYPE); + if (!attribute.isNull()) + { + type = attribute.value(); + if (type == "1") + type = QM_VERB; + else if (type == "2") // convert from pre-0.5 versions + type = QM_NOUN; + else if (type == "3") + type = QM_NAME; + + if (type.length() != 0 && type.left(1) == QM_USER_TYPE) + { + int num = qMin(type.mid (1, 40).toInt(), 1000); // paranoia check + if (num > m_doc->typeDescriptions().count()) + { + // description missing ? + QString s; + QStringList sl = m_doc->typeDescriptions(); + for (int i = m_doc->typeDescriptions().count(); i < num; i++) + { + s.setNum(i + 1); + s.prepend("#"); // invent descr according to number + sl.append(s); + } + m_doc->setTypeDescriptions(sl); + } + } + } + + pronunciation = ""; + attribute = domElementExpressionChild.attributeNode(KV_PRONUNCE); + if (!attribute.isNull()) + pronunciation = attribute.value(); + + query_id = ""; + attribute = domElementExpressionChild.attributeNode(KV_QUERY); + if (!attribute.isNull()) + query_id = attribute.value(); + + return true; +} + + +bool KEduVocKvtml2Reader::readExpression(QDomElement &expressionElement) +{ + //grade_t grade; + //grade_t r_grade; + //int qcount; + //int r_qcount; + //int bcount; + //int r_bcount; + QString comment; + //QString pronunciation; + //QDateTime qdate; + //QDateTime r_qdate; + //bool inquery; + //bool active; + //QString lang; + QString textstr; + //QString exprtype; + //QString q_org; + //QString q_trans; + //QString query_id; + //int lesson = 0; + //int width; + //QString type; + //QString faux_ami_f; + //QString faux_ami_t; + //QString synonym; + //QString example; + //QString antonym; + //QString usage; + //QString paraphrase; + //QList conjug; + //KEduVocComparison comparison; + //KEduVocMultipleChoice mc; + KEduVocExpression expr; + + QDomAttr attribute; + QDomElement currentElement; + QDomElement currentChild; + + //------------------------------------------------------------------------- + // Attributes + //------------------------------------------------------------------------- + + //attribute = domElementParent.attributeNode(KV_LESS_MEMBER); + //if (!attribute.isNull()) + // lesson = attribute.value().toInt(); + + //if (lesson && lesson > m_doc->lessonDescriptions().count()) + //{ + // // description missing ? + // QString s; + // QStringList sl = m_doc->lessonDescriptions(); + // for (int i = m_doc->lessonDescriptions().count(); i < lesson; i++) + // { + // s.setNum(i + 1); + // s.prepend("#"); //create descriptions from number + // sl.append(s); + // } + // m_doc->setLessonDescriptions(sl); + //} + + //attribute = domElementParent.attributeNode(KV_SELECTED); + //if (!attribute.isNull()) + // inquery = attribute.value() == "1" ? true : false; + //else + // inquery = false; + + //attribute = domElementParent.attributeNode(KV_INACTIVE); + //if (!attribute.isNull()) + // active = attribute.value() == "1" ? false : true; + //else + // active = true; + + //attribute = domElementParent.attributeNode(KV_EXPRTYPE); + //if (!attribute.isNull()) + //{ + // exprtype = attribute.value(); + // if (exprtype == "1") + // exprtype = QM_VERB; + // else if (exprtype == "2") // convert from pre-0.5 versions + // exprtype = QM_NOUN; + // else if (exprtype == "3") + // exprtype = QM_NAME; + + // if (exprtype.length() != 0 && exprtype.left(1) == QM_USER_TYPE) + // { + // int num = qMin(exprtype.mid(1, 40).toInt(), 1000); // paranoia check + // if (num > m_doc->typeDescriptions().count()) + // { + // // description missing ? + // QString s; + // QStringList sl = m_doc->typeDescriptions(); + // for (int i = m_doc->typeDescriptions().count(); i < num; i++) + // { + // s.setNum(i + 1); + // s.prepend("#"); // invent descr according to number + // sl.append(s); + // } + // m_doc->setTypeDescriptions(sl); + // } + // } + //} + + //------------------------------------------------------------------------- + // Children 'Translation' + //------------------------------------------------------------------------- + + //QDomNodeList translationList = domElementParent.elementsByTagName(KV_TRANS); + + // count which translation we are on + int i=0; + + currentElement = expressionElement.firstChildElement(KVTML_NAME); + if (!currentElement.isNull()) + { + expr = KEduVocExpression(currentElement.text()); + } + + currentElement = expressionElement.firstChildElement(KVTML_COMMENT); + if (!currentElement.isNull()) + { + expr.translation(0).setComment(currentElement.text()); + } + + currentElement = expressionElement.firstChildElement(KVTML_TYPE); + if (!currentElement.isNull()) + { + expr.translation(0).setType(currentElement.text()); + } + + currentElement = expressionElement.firstChildElement(KVTML_LOCALE); + if (!currentElement.isNull()) + { + } + + // kvtml 1: we always have an original element (required) +// currentElement = domElementParent.firstChildElement(KV_ORG); +// if (currentElement.isNull()) { // sanity check +// m_errorMessage = i18n("Data for original language missing"); +// return false; +// } + +// while (!currentElement.isNull()) { +// type = exprtype; // seems like type can be in the parent element and overwritten in the children here :( + +// //----------- +// // Attributes +// //----------- + +// // read attributes - the order of the query grades is interchanged! +// if (i == 0 && !readExpressionChildAttributes( currentElement, lang, grade, r_grade, qcount, r_qcount, qdate, r_qdate, remark, bcount, r_bcount, query_id, +// pronunciation, width, type, faux_ami_t, faux_ami_f, synonym, example, antonym, usage, paraphrase)) +// return false; + +// if (i != 0 && !readExpressionChildAttributes( currentElement, lang, grade, r_grade, qcount, r_qcount, qdate, r_qdate, remark, bcount, r_bcount, query_id, +// pronunciation, width, type, faux_ami_f, faux_ami_t, synonym, example, antonym, usage, paraphrase)) +// return false; + +// if (m_doc->entryCount() == 0) +// { +// // only accept in first entry +// if (width >= 0) +// m_doc->setSizeHint(i, width); + +// if (query_id == KV_O) +// q_org = lang; + +// if (query_id == KV_T) + +// q_trans = lang; +// } +////kDebug() << " TranslationList.count(): " << translationList.count() << " Entry count: " << m_doc->entryCount() << endl; +// if (m_doc->entryCount() == 0) { // this is because in kvtml the languages are saved in the FIRST ENTRY ONLY. +//kDebug() << " Read Expression with identifiers: " << i << endl; +// // new translation +// if (lang.isEmpty()) { +// if (i == 0) { +// lang = "original"; +// } else { +// // no definition in first entry ? +// lang.setNum(m_doc->identifierCount()); +// lang.prepend("translation "); +// } +// m_doc->appendIdentifier(lang); +// } +// } +// else +// { +// if (lang != m_doc->identifier(i) && !lang.isEmpty()) +// { +// // different language ? +// m_errorMessage = i18n("ambiguous definition of language code"); +// return false; +// } +// } + +// //--------- +// // Children + +// currentChild = currentElement.firstChildElement(KV_CONJUG_GRP); +// if (!currentChild.isNull()) { +// conjug.clear(); +// if (!readConjug(currentChild, conjug)) +// return false; +// } + +// currentChild = currentElement.firstChildElement(KV_COMPARISON_GRP); +// if (!currentChild.isNull()) { +// comparison.clear(); +// if (!readComparison(currentChild, comparison)) +// return false; +// } + +// currentChild = currentElement.firstChildElement(KV_MULTIPLECHOICE_GRP); +// if (!currentChild.isNull()) { +// mc.clear(); +// if (!readMultipleChoice(currentChild, mc)) +// return false; +// } + +// textstr = currentElement.lastChild().toText().data(); +// if (textstr.isNull()) +// textstr = ""; + +// if (i == 0) { +// expr = KEduVocExpression(textstr); +// expr.setLesson(lesson); +// expr.setInQuery(inquery); +// expr.setActive(active); +// } else { +// expr.setTranslation(i, textstr); +// } + +// if (conjug.size() > 0) { +// for ( int conjugationIndex = 0; conjugationIndex < conjug.size(); conjugationIndex++ ) { +// expr.translation(i).setConjugation(conjug[conjugationIndex]); +// } +// //expr.setConjugation(i, conjug[0]); ///@todo check if this is better than the above! + +// conjug.clear(); +// } +// if (!comparison.isEmpty()) +// { +// expr.translation(i).setComparison(comparison); +// comparison.clear(); +// } +// if (!mc.isEmpty()) +// { +// expr.translation(i).setMultipleChoice(mc); +// mc.clear(); +// } +// if (!type.isEmpty() ) +// expr.translation(i).setType (type); +// if (!remark.isEmpty() ) +// expr.translation(i).setComment (remark); +// if (!pronunciation.isEmpty() ) +// expr.translation(i).setPronunciation(pronunciation); +// if (!faux_ami_f.isEmpty() ) +// expr.translation(i).setFalseFriend (0, faux_ami_f); +// if (!faux_ami_t.isEmpty() ) +// expr.translation(0).setFalseFriend (i, faux_ami_t); +// if (!synonym.isEmpty() ) +// expr.translation(i).setSynonym (synonym); +// if (!example.isEmpty() ) +// expr.translation(i).setExample (example); +// if (!usage.isEmpty() ) +// expr.translation(i).setUsageLabel (usage); +// if (!paraphrase.isEmpty() ) +// expr.translation(i).setParaphrase (paraphrase); +// if (!antonym.isEmpty() ) +// expr.translation(i).setAntonym (antonym); + +// if ( i != 0 ) { +// expr.translation(i).gradeFrom(0).setQueryCount(qcount); +// expr.translation(0).gradeFrom(i).setQueryCount(r_qcount); +// expr.translation(i).gradeFrom(0).setBadCount(bcount); +// expr.translation(0).gradeFrom(i).setBadCount(r_bcount); +// expr.translation(i).gradeFrom(0).setQueryDate(qdate); +// expr.translation(0).gradeFrom(i).setQueryDate(r_qdate); +// } +////kDebug() << "KEduVocKvtml2Reader::readExpression(): id: " << i << " translation: " << textstr << endl; + +// // Next translation +// currentElement = currentElement.nextSiblingElement(KV_TRANS); +// i++; +// } + + //if (m_doc->entryCount() == 0) + // m_doc->setQueryIdentifier(q_org, q_trans); + m_doc->appendEntry(&expr); + + return true; +} + +#include "keduvockvtml2reader.moc" diff --git a/kdeeducore/keduvockvtml2reader.h b/kdeeducore/keduvockvtml2reader.h new file mode 100644 index 0000000..255043d --- /dev/null +++ b/kdeeducore/keduvockvtml2reader.h @@ -0,0 +1,117 @@ +/*************************************************************************** + read a KEduVocDocument from a KVTML2 file + ----------------------------------------------------------------------- + copyright : (C) 2007 Jeremy Whiting + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#ifndef KEDUVOCKVTML2READER_H +#define KEDUVOCKVTML2READER_H + +#include +#include + +#include "keduvocexpression.h" +#include "keduvocgrammar.h" +#include "keduvocmultiplechoice.h" + +class QIODevice; +class KEduVocDocument; + +// internal types, indented are subtypes + +#define QM_VERB "v" // go +#define QM_VERB_IRR "ir" +#define QM_VERB_REG "re" +#define QM_NOUN "n" // table, coffee +#define QM_NOUN_F "f" +#define QM_NOUN_M "m" +#define QM_NOUN_S "s" +#define QM_NAME "nm" +#define QM_ART "ar" // article +#define QM_ART_DEF "def" // definite a/an +#define QM_ART_IND "ind" // indefinite the +#define QM_ADJ "aj" // adjective expensive, good +#define QM_ADV "av" // adverb today, strongly +#define QM_PRON "pr" // pronoun you, she +#define QM_PRON_POS "pos" // possessive my, your +#define QM_PRON_PER "per" // personal +#define QM_PHRASE "ph" +#define QM_NUM "num" // numeral +#define QM_NUM_ORD "ord" // ordinal first, second +#define QM_NUM_CARD "crd" // cardinal one, two +#define QM_INFORMAL "ifm" +#define QM_FIG "fig" +#define QM_CON "con" // conjuncton and, but +#define QM_PREP "pre" // preposition behind, between +#define QM_QUEST "qu" // question who, what + +// type delimiters + +#define QM_USER_TYPE "#" // designates number of user type +#define QM_TYPE_DIV ":" // divide main from subtype + +// usage delimiters (also declared in UsageManager.h) + +#define UL_USER_USAGE "#" // designates number of user type + +/** +@author Eric Pignet +*/ +class KEduVocKvtml2Reader : public QObject +{ + Q_OBJECT +public: + KEduVocKvtml2Reader(QIODevice *file); + + bool readDoc(KEduVocDocument *doc); + + bool readLesson(QDomElement &domElementParent); + bool readArticle(QDomElement &domElementParent); + bool readConjug(QDomElement &domElementParent, QList &curr_conjug); + bool readOptions(QDomElement &domElementParent); + bool readType(QDomElement &domElementParent); + bool readTense(QDomElement &domElementParent); + bool readUsage(QDomElement &domElementParent); + bool readComparison(QDomElement &domElementParent, KEduVocComparison &comp); + bool readMultipleChoice(QDomElement &domElementParent, KEduVocMultipleChoice &mc); + bool readExpressionChildAttributes(QDomElement &domElementExpressionChild, + QString &lang, + grade_t &grade, grade_t &rev_grade, + int &count, int &rev_count, + QDateTime &date, QDateTime &rev_date, + QString &remark, + int &bcount, int &rev_bcount, + QString &query_id, + QString &pronunciation, + int &width, + QString &type, + QString &faux_ami_f, + QString &faux_ami_t, + QString &synonym, + QString &example, + QString &antonym, + QString &usage, + QString ¶phrase); + bool readExpression(QDomElement &expressionElement); + bool readBody(QDomElement &domElementParent); + + QString errorMessage() const {return m_errorMessage;} + +private: + QIODevice *m_inputFile; + KEduVocDocument *m_doc; + QString m_errorMessage; + int m_cols; + int m_lines; +}; + +#endif -- 2.47.3