Очень впечатляющий ответ! Спасибо!

Question

Jan 19, 2017, 06:58 PM

parsing boost boost-spirit c++boost-spirit-qi

Очень впечатляющий ответ! Спасибо!

дал парсер Qi для пользовательского формата текстового файла. Есть десятки тысяч записей для обработки, и каждая запись обычно имеет от 1 до 10 подстатей. Я положил урезанный рабочий пример моего парсераВот.

#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <boost/spirit/include/support_istream_iterator.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>

#include <fstream>
#include <iostream>
#include <string>

using std::string;
using std::vector;
using std::cout;
using std::endl;

namespace model
{
    namespace qi = boost::spirit::qi;

    struct spectrum
    {
        string comment;
        string file;
        string nativeId;
        double precursorMz;
        int precursorCharge;
        double precursorIntensity;
    };

    struct cluster
    {
        string id;
        vector<spectrum> spectra;
    };

    struct clustering
    {
        string name;
        vector<cluster> clusters;
    };
}

// Tell fusion about the data structures to make them first-class fusion citizens.
// Must be at global scope.

BOOST_FUSION_ADAPT_STRUCT(
    model::spectrum,
    (string, comment)
    (string, file)
    (string, nativeId)
    (double, precursorMz)
    (int, precursorCharge)
    (double, precursorIntensity)
)

BOOST_FUSION_ADAPT_STRUCT(
    model::cluster,
    (string, id)
    (std::vector<model::spectrum>, spectra)
)

BOOST_FUSION_ADAPT_STRUCT(
    model::clustering,
    (string, name)
    (std::vector<model::cluster>, clusters)
)

namespace {
    struct ReportError
    {
      template<typename, typename, typename, typename> struct result { typedef void type; };

      // contract the string to the surrounding new-line characters
      template<typename Iter>
      void operator()(Iter first_iter, Iter last_iter,
                      Iter error_iter, const boost::spirit::qi::info& what) const
      {
        std::string first(first_iter, error_iter);
        std::string last(error_iter, last_iter);
        auto first_pos = first.rfind('\n');
        auto last_pos = last.find('\n');
        auto error_line = ((first_pos == std::string::npos) ? first
                            : std::string(first, first_pos + 1))
                          + std::string(last, 0, last_pos);
        //auto error_pos = (error_iter - first_iter) + 1;
        /*auto error_pos = error
        if (first_pos != std::string::npos)
          error_pos -= (first_pos + 1);*/

        std::cerr << "Error parsing in " << what << std::endl
                  << error_line << std::endl
                  //<< std::setw(error_pos) << '^'
                  << std::endl;
        }
    };

    const boost::phoenix::function<ReportError> report_error = ReportError();
}

namespace model
{
    template <typename Iterator>
    struct cluster_parser : qi::grammar<Iterator, clustering(), qi::blank_type>
    {
        cluster_parser() : cluster_parser::base_type(clusters)
        {
            using qi::int_;
            using qi::lit;
            using qi::double_;
            using qi::bool_;
            using qi::lexeme;
            using qi::eol;
            using qi::ascii::char_;
            using qi::on_error;
            using qi::fail;
            using namespace qi::labels;
            using boost::phoenix::construct;
            using boost::phoenix::val;

            quoted_string %= lexeme['"' > +(char_ - '"') > '"'];

            spectrum_start %=
                lit("SPEC") >
                "#" > +(char_ - "File:") >
                "File:" > quoted_string > lit(",") >
                "NativeID:" > quoted_string >
                bool_ > double_ > int_ > double_;

            cluster_start %= 
                "=Cluster=" > eol >
                "id=" > +(char_ - eol) > eol >
                spectrum_start % eol;

            clusters %= 
                "name=" > +(char_ - eol) > eol >
                eol >
                cluster_start % eol;

            BOOST_SPIRIT_DEBUG_NODES((clusters)(cluster_start)(quoted_string)(spectrum_start))

            //on_error<fail>(clusters, report_error(_1, _2, _3, _4));
            //on_error<fail>(cluster_start, report_error(_1, _2, _3, _4));
            //on_error<fail>(spectrum_start, report_error(_1, _2, _3, _4));
            //on_error<fail>(quoted_string, report_error(_1, _2, _3, _4));

            // on_success(cluster_start, quantify_cluster(_1, _2, _3, _4)); ??
        }

        qi::rule<Iterator, std::string(), qi::blank_type> quoted_string;
        qi::rule<Iterator, cluster(), qi::blank_type> cluster_start;
        qi::rule<Iterator, spectrum(), qi::blank_type> spectrum_start;
        qi::rule<Iterator, clustering(), qi::blank_type> clusters;
    };
}

int main()
{
    using namespace model;

    cluster_parser<boost::spirit::istream_iterator> g; // Our grammar
    string str;
    //std::ifstream input("c:/test/Mo_tai.clustering");
    std::istringstream input("name=GreedyClustering_0.99\n"
"\n"
"=Cluster=\n"
"id=9c8c5830-5841-4f77-b819-64180509615b\n"
"SPEC\t#file=w:\\test\\Mo_Tai_iTRAQ_f4.mgf#id=index=219#title=Mo_Tai_iTRAQ_f4.1254.1254.2 File:\"Mo_Tai_iTRAQ_f4.raw\", NativeID:\"controllerType=0 controllerNumber=1 scan=1254\"\ttrue\t\t300.1374\t2\t\t\t0.0\n"
"=Cluster=\n"
"id=f8f384a1-3d5f-4af1-9581-4d03a5aa3342\n"
"SPEC\t#file=w:\\test\\Mo_Tai_iTRAQ_f9.mgf#id=index=560#title=Mo_Tai_iTRAQ_f9.1666.1666.3 File:\"Mo_Tai_iTRAQ_f9.raw\", NativeID:\"controllerType=0 controllerNumber=1 scan=1666\"\ttrue\t\t300.14413\t3\t\t\t0.0\n"
"SPEC\t#file=w:\\test\\Mo_Tai_iTRAQ_f9.mgf#id=index=520#title=Mo_Tai_iTRAQ_f9.1621.1621.3 File:\"Mo_Tai_iTRAQ_f9.raw\", NativeID:\"controllerType=0 controllerNumber=1 scan=1621\"\ttrue\t\t300.14197\t3\t\t\t0.0\n"
"=Cluster=\n"
"id=b84b79e1-44bc-44c0-a9af-5391ca02582d\n"
"SPEC\t#file=w:\\test\\Mo_Tai_iTRAQ_f2.mgf#id=index=7171#title=Mo_Tai_iTRAQ_f2.12729.12729.2 File:\"Mo_Tai_iTRAQ_f2.raw\", NativeID:\"controllerType=0 controllerNumber=1 scan=12729\"\ttrue\t\t300.15695\t2\t\t\t0.0");
    input.unsetf(std::ios::skipws);
    boost::spirit::istream_iterator begin(input);
    boost::spirit::istream_iterator end;

    clustering clusteringResults;
    bool r = phrase_parse(begin, end, g, qi::blank, clusteringResults);

    if (r && begin == end)
    {
        cout << "Parsing succeeded (" << clusteringResults.clusters.size() << " clusters)\n";

        /*for (size_t i = 0; i < std::min((size_t)10, clusteringResults.clusters.size()); ++i)
        {
            cluster& c = clusteringResults.clusters[i];
            cout << "Cluster " << c.id << " - avg. precursor m/z: " << c.avgPrecursorMz << ", num. spectra: " << c.spectra.size() << endl;
        }*/
        return 1;
    }
    else
    {
        std::cout << "Parsing failed (" << clusteringResults.clusters.size() << " clusters)\n";

        if (!clusteringResults.clusters.empty())
        {
            cluster& c = clusteringResults.clusters.back();
            cout << "Last cluster parsed " << c.id << ", num. spectra: " << c.spectra.size() << endl;
        }
        return 1;
    }
}

Я не хочу анализировать весь файл в памяти перед его обработкой. Как я могу поставить в очередь запись (кластер) для обработки после того, как каждый кластер завершит синтаксический анализ, удалить кластер после обработки и продолжить анализ? Еще лучше было бы иметь другой поток, обрабатывающий асинхронно.

Очень впечатляющий ответ! Спасибо!

Ответы на вопрос(1)

Ваш ответ на вопрос

Популярные вопросы

Вы очень активны! Это здорово!

Очень впечатляющий ответ! Спасибо!

Ответы на вопрос(1)

Ваш ответ на вопрос

Популярные вопросы