I prepared a new toy to play with at Debconf and uploaded it to unstable:
Package: libept-dev
Description: High-level library for managing Debian package information
The library defines a very minimal framework in which many sources of data
about Debian packages can be implemented and queried together.
.
The library includes four data sources:
.
* APT: access the APT database
* Debtags: access the Debtags tag information
* Popcon: access Popcon package scores
* TextSearch: fast Xapian-based full text search on package description
.
This is the development library.
Package: ept-cache
Description: Commandline tool to search the package archive
ept-cache is a simple commandline interface to the functions of libept.
.
It can currently search and display data from four sources:
.
* The APT database
* The Debtags tag information
* Popcon package scores
* A fast Xapian-based full text index on package descriptions
Yes, this finally brings lots of very cool data sources about packages together.
Try this one:
# Check if all data providers are active and give instructions on how
# to activate those that aren't
ept-cache info
# Follow the instructions to activate everything
# Show all GUI image editors, sorted by popularity, in reverse order
ept-cache search image editor -t gui -s p-
If you have the Xapian data provider enabled, the results of a search are given in relevance order, the most relevant first. And also, searches are done with proper stemming, so if you look for image editor it will also find image editing, although it would score image editor higher.
It's also quite lovely to work with it in C++
. I'll improvise here a few
examples:
Print name and short description of every package
#include <ept/apt/apt.h>
#include <ept/apt/packagerecord.h>
using namsepace std;
using namespace ept::apt;
void playWithApt()
{
// Apt data source
Apt apt;
// Parser of package records
PackageRecord rec;
// Iterate all package records
for (Apt::record_iterator i = apt.recordBegin();
i != apt.recordEnd(); ++i)
{
rec.scan(*i);
cout << rec.pakcage() << " - " << rec.shortDescription() << endl;
}
}
Show all image editors
#include <ept/debtags/debtags.h>
#include <set>
using namespace ept::debtags;
void playWithDebtags()
{
// Apt data source
Apt apt;
// Parser of package records
PackageRecord rec;
// Debtags data source
Debtags debtags;
if (!debtags.hasData())
return;
set<Tag> tags;
tags.insert(debtags.vocabulary().tagByName("works-with::image:raster"));
tags.insert(debtags.vocabulary().tagByName("use::editing"));
tags.insert(debtags.vocabulary().tagByName("role::program"));
set<string> results = debtags.getItemsHavingTags(tags);
for (set<string>::const_iterator i = results.begin();
i != results.end(); ++i)
{
rec.scan(apt.rawRecord(*i));
cout << rec.pakcage() << " - " << rec.shortDescription() << endl;
}
}
Print all package names, sorted by popularity
#include <ept/popcon/popcon.h>
#include <algorithm>
using namespace ept::popcon;
// STL comparator
struct PopconCompare
{
Popcon& popcon;
bool operator<(const std::string& pkg1, const std::string& pkg2) const
{
return popcon[pkg1] < popocon[pkg2];
}
};
void playWithPopcon()
{
// Apt data source
Apt apt;
// Popcon data source
Popcon popcon;
vector<string> sorted;
if (!popcon.hasData())
return;
// Get all package names in the vector
copy(apt.begin(), apt.end(), back_inserter(sorted));
// Sort it by popularity
sort(sorted.begin(), sorted.end(), PopconCompare(popcon));
// Print it out
for (vector<string>::const_iterator i = sorted.begin();
i != sorted.end(); ++i)
cout << *i << endl;
}
Search for image viewer, but we don't want to view kernel images
#include <xapian.h>
using namespace ept::textsearch;
void playWithXapian()
{
TextSearch textsearch;
vector<string> wanted;
vector<string> notwanted;
Xapian::Enquire enq(textsearch.db());
// This will tokenise the search query into terms, stem them
// and OR them together in a query. Xapian will score higher
// those results in which more ORed terms match, which is what
// we want.
Xapian::Query want = textSearch.makeOrQuery("image viewer");
Xapian::Query dontWant = textSearch.makeOrQuery("linux kernel");
enq.set_query(Xapian::Query(Xapian::Query::OP_AND_NOT, want, dontWant));
// Print the top 20 results, with their relevance percentage
Xapian::MSet matches = enq.get_mset(0, 20);
for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i)
{
// The get_data() of a document is the package name
cout << i.get_document().get_data() << " ("
<< i.get_percent() << "%)" << endl;
}
}