Libextractor - GNU Project

January 31st · info libextractor

extract
file
$ svn checkout https://gnunet.org/svn/Extractor
$ svn checkout https://gnunet.org/svn/Extractor-java
$ svn checkout https://gnunet.org/svn/Extractor-mono
$ svn checkout https://gnunet.org/svn/Extractor-python
git clone git://git.perldition.org/File-Extractor.git/
$ svn checkout https://gnunet.org/svn/Extractor-php
extract
libextractor-dev
# apt-get install libextractor-dev extract

$ tar xzvf libextractor.x.y.z.tar.gz
$ cd libextractor.x.y.z
$ ./configure
$ make
# make install

README
-b
man 1 extract
$ extract libextractor-0.1.3-1.src.rpm
Keywords for file libextractor-0.1.3-1.src.rpm:
os - linux
resource-identifier - http://ovmj.org/libextractor/
group -System Environment/Libraries
license - LGPL
copyright - LGPL
size - 251545
build-host - wedge.cs.purdue.edu
creation date - Wed Dec 25 07:50:07 2002
description - libextractor is a simple library...
summary - keyword extraction library
release - 1
version - 0.1.3
title - libextractor
unknown - SOURCE RPM 3.0
mimetype - application/x-rpm

$ extract extractor_logo.png
Keywords for file extractor_logo.png:
image dimensions - 272x188
thumbnail - (binary, 5932 bytes)
image dimensions - 272x188
thumbnail - (binary, 6427 bytes)
image dimensions - 272x188
thumbnail - (binary, 6427 bytes)
mimetype - image/png
mimetype - image/png
image dimensions - 272x188
keywords - The libextractor logo

-lextractor
man 3 libextractor
import Extractor
#include <extractor.h>

int 
main (int argc, char * argv[]) 
{
  struct EXTRACTOR_PluginList *plugins
    = EXTRACTOR_plugin_add_defaults (EXTRACTOR_OPTION_DEFAULT_POLICY);
  EXTRACTOR_extract (plugins, argv[1],
                     NULL, 0, 
                     &EXTRACTOR_meta_data_print, stdout);
  EXTRACTOR_plugin_remove_all (plugins);
  return 0;
}

libextractor_XXX.so
$PREFIX/lib/libextractor/
EXTRACTOR_XXX_extract_method
void
EXTRACTOR_XXX_extract_method (struct EXTRACTOR_ExtractContext *ec);

ec
extract
ec->proc
ec->cls
proc
ec
ec->config
config
format
proc
text/plain
typedef int (*EXTRACTOR_MetaDataProcessor)(void *cls,
                                           const char *plugin_name,
                                           enum EXTRACTOR_MetaType type,
                                           enum EXTRACTOR_MetaFormat format,
                                           const char *data_mime_type,
                                           const char *data,
                                           size_t data_len);

GNU Libextractor

Downloading Libextractor

Documentation

Mailing lists

Getting involved

Quick Introduction

Licensing