Advanced Topics

LDAP

The BioMAJ watcher, provides an optional web interface to manage banks. Users can create “private” banks and manage them via the web.

ElasticSearch

In order to use the --search flag, you may wish to connect an ElasticSearch cluster.

You will need to edit your global.properties to indicate where the ES servers are:

use_elastic=0
#Comma separated list of elasticsearch nodes  host1,host2:port2
elastic_nodes=localhost
elastic_index=biomaj
# Calculate data.dir size stats
data.stats=1

An example docker-compose.yml would use this:

version: '2'
services:
    biomaj:
        image: osallou/biomaj-docker
        links:
            - mongodb:biomaj-mongodb
            - elasticsearch
        volumes:
            - ./data:/var/lib/biomaj
            - ./global.advanced.properties:/etc/biomaj/global.properties

    mongodb:
        image: mongo

    elasticsearch:
        image: elasticsearch:1.7

And a modified global.properties referenced in that file would enable elasticsearch:

[GENERAL]
root.dir=/var/lib/biomaj
conf.dir=%(root.dir)s/conf
log.dir=%(root.dir)s/log
process.dir=%(root.dir)s/process
cache.dir=%(root.dir)s/cache
lock.dir=%(root.dir)s/lock
#The root directory where all databases are stored.
#If your data is not stored under one directory hirearchy
#you can override this value in the database properties file.
data.dir=%(root.dir)s/data

db.url=mongodb://biomaj-mongodb:27017
db.name=biomaj

use_ldap=0
ldap.host=localhost
ldap.port=389
ldap.dn=nodomain

use_elastic=1
#Comma separated list of elasticsearch nodes  host1,host2:port2
elastic_nodes=elasticsearch
elastic_index=biomaj
# Calculate data.dir size stats
data.stats=1

celery.queue=biomaj
celery.broker=mongodb://biomaj-mongodb:27017/biomaj_celery


auto_publish=1

########################
# Global properties file


#To override these settings for a specific database go to its
#properties file and uncomment or add the specific line you want
#to override.

#----------------
# Mail Configuration
#---------------
#Uncomment thes lines if you want receive mail when the workflow is finished

mail.smtp.host=
#mail.stmp.port=25
mail.admin=
mail.from=biomaj@localhost
mail.user=
mail.password=
mail.tls=
# tail last X bytes of log in mail body , 0 = no tail
# mail.body.tail=2000000
# attach log file if size < X bytes, 0 for no attach
#mail.body.attach=4000000
# path to jinja template for subject, leave empty for defaults
#mail.template.subject=
# path to jinja template for body, leave empty for default
#mail.template.body=

#---------------------
#Proxy authentification
#---------------------
#proxyHost=
#proxyPort=
#proxyUser=
#proxyPassword=

#---------------------
# PROTOCOL
#-------------------
#possible values : ftp, http, rsync, local
port=21
username=anonymous
password=anonymous@nowhere.com



#access user for production directories
production.directory.chmod=775

#Number of thread during the download
bank.num.threads=4

#Number of threads to use for downloading and processing
files.num.threads=4

#to keep more than one release increase this value
keep.old.version=0

#Link copy property
do.link.copy=true


#The historic log file is generated in log/
#define level information for output : DEBUG,INFO,WARN,ERR
historic.logfile.level=INFO

http.parse.dir.line=<a[\\s]+href=\"([\\S]+)/\".*alt=\"\\[DIR\\]\">.*([\\d]{2}-[\\w\\d]{2,5}-[\\d]{4}\\s[\\d]{2}:[\\d]{2})
http.parse.file.line=<a[\\s]+href=\"([\\S]+)\".*([\\d]{2}-[\\w\\d]{2,5}-[\\d]{4}\\s[\\d]{2}:[\\d]{2})[\\s]+([\\d\\.]+[MKG]{0,1})

http.group.dir.name=1
http.group.dir.date=2
http.group.file.name=1
http.group.file.date=2
http.group.file.size=3

#Needed if data sources are contains in an archive
log.files=true

local.files.excluded=\\.panfs.*

#~40mn
ftp.timeout=2000000
ftp.automatic.reconnect=5
ftp.active.mode=false

# Bank default access
visibility.default=public

#proxy=http://localhost:3128

[loggers]
keys = root, biomaj

[handlers]
keys = console

[formatters]
keys = generic

[logger_root]
level = INFO
handlers = console

[logger_biomaj]
level = INFO
handlers = console
qualname = biomaj
propagate=0

[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = DEBUG
formatter = generic

[formatter_generic]
format = %(asctime)s %(levelname)-5.5s [%(name)s][%(threadName)s] %(message)s