Source code for hveto.core

# -*- coding: utf-8 -*-
# Copyright (C) Joshua Smith (2016-)
#
# This file is part of the hveto python package.
#
# hveto is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# hveto is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with hveto.  If not, see <http://www.gnu.org/licenses/>.

"""Core of the HierarchichalVeto algorithm
"""

import itertools
from math import (log, exp, log10)
from bisect import (bisect_left, bisect_right)

import numpy

from scipy.special import (gammainc, gammaln)

from astropy.table import vstack as vstack_tables

from gwpy.segments import (SegmentList, Segment)

__author__ = 'Duncan Macleod <duncan.macleod@ligo.org>'
__credits__ = 'Joshua Smith <joshua.smith@ligo.org>'

LOG_10 = log(10)
LOG_EXP_1 = log10(exp(1))


# -- define round structure --------------------------------------------------


[docs]
class HvetoRound(object):
    __slots__ = (
        'n',
        'primary',
        'winner',
        'segments',
        'vetoes',
        'use_percentage',
        'efficiency',
        'cum_efficiency',
        'cum_deadtime',
        'plots',
        'files',
        'scans',
        'rank',
    )

    def __init__(self, round, primary, segments=None, vetoes=None,
                 plots=[], files={}, rank=None):
        self.n = round
        self.primary = primary
        self.segments = segments
        self.vetoes = vetoes
        self.plots = []
        self.files = {}
        self.scans = None
        self.rank = rank

    @property
    def livetime(self):
        return float(abs(self.segments))

    @property
    def deadtime(self):
        return (float(abs((self.vetoes & self.segments).coalesce())),
                float(abs(self.segments)))



# -- core methods ------------------------------------------------------------


[docs]
def find_all_coincidences(triggers, channel, snrs, windows):
    """Find the number of coincs between each auxiliary channel and the primary

    Parameters
    ----------
    primary : `numpy.ndarray`
        an array of times for the primary channel
    auxiliary : `numpy.recarray`
        an array of triggers for a set of auxiliary channels
    snrs : `list` of `float`
        the SNR thresholds to use
    window : `list` of `float`
        the time windows to use
    """
    # FIXME need to work out having time column the same for each channel
    triggers.sort('time')
    windows = sorted(windows, reverse=True)
    snrs = sorted(snrs)
    coincs = dict((p, {}) for p in itertools.product(windows, snrs))
    ntrig = len(triggers)

    for i, x in enumerate(triggers):
        if x['channel'] != channel:
            continue
        t = x['time']
        channels = dict((key, set()) for key in coincs)
        j = i - 1
        segs = [Segment(t - dt / 2., t + dt / 2.) for dt in windows]

        # define coincidence test
        def add_if_coinc(event):
            if event['channel'] == channel:
                return
            in_seg = filter(lambda s: s[0] <= event['time'] <= s[1], segs)
            if not in_seg:  # no triggers in window
                return
            for k, w in enumerate(in_seg):
                for snr in filter(lambda s: event['snr'] >= s, snrs):
                    channels[(windows[k], snr)].add(event['channel'])
            return 1

        # search left half-window
        while j >= 0:
            if not add_if_coinc(triggers[j]):
                break
            j -= 1
        j = i + 1
        # search right half-window
        while j < ntrig:
            if not add_if_coinc(triggers[j]):
                break
            j += 1

        # count 'em up
        for p, cset in channels.items():
            for c in cset:
                try:
                    coincs[p][c] += 1
                except KeyError:
                    coincs[p][c] = 1

    return coincs




[docs]
def find_max_significance(primary, auxiliary, channel, snrs, windows,
                          livetime):
    """Find the maximum Hveto significance for this primary-auxiliary pair

    Parameters
    ----------
    primary : `numpy.recarray`
        record array of data from the primary channel
    auxiliary : `numpy.recarray`
        record array from the auxiliary channel
    snrs : `list` of `float`
        the SNR thresholds to use
    window : `list` of `float`
        the time windows to use

    Returns
    -------
    winner : `HvetoWinner`
        the parameters and segments generated by the (snr, dt) with the
        highest significance
    """
    rec = vstack_tables([primary] + list(auxiliary.values()))
    coincs = find_all_coincidences(rec, channel, snrs, windows)
    winner = HvetoWinner(name='unknown', significance=-1)
    sigs = dict((c, 0) for c in auxiliary)
    for p, cdict in coincs.items():
        dt, snr = p
        for chan in cdict:
            mu = (len(primary) * (auxiliary[chan]['snr'] >= snr).sum() * dt / livetime)
            # NOTE: coincs[p][chan] counts the number of primary channel
            # triggers coincident with a 'chan' trigger
            try:
                sig = significance(coincs[p][chan], mu)
            except KeyError:
                sig == 0
            if sig > sigs[chan]:
                sigs[chan] = sig
            if sig > winner.significance:
                winner.name = chan
                winner.snr = snr
                winner.window = dt
                winner.significance = sig
                winner.mu = mu
    return winner, sigs




[docs]
class HvetoWinner(object):
    __slots__ = ['name', 'significance', 'snr', 'window', 'segments',
                 'events', 'ncoinc', 'mu']

    def __init__(self, name=None, significance=None, snr=None,
                 window=None, segments=None, events=None, ncoinc=0, mu=None):
        super(HvetoWinner, self).__init__()
        self.name = name
        self.significance = significance
        self.snr = snr
        self.window = window
        self.segments = segments
        self.events = events
        self.mu = mu


[docs]
    def get_segments(self, times):
        return SegmentList([Segment(t - self.window / 2., t + self.window / 2.)
                            for t in times])





[docs]
def coinc_significance(a, b, dt, livetime):
    """Calculate the significance of coincidences between two time arrays

    Parameters
    ----------
    a : `numpy.ndarray`
        first array
    b : `numpy.ndarray`
        second array
    dt : `float`
        coincidence window
    livetime : `float`
        the livetime of the analysis

    Returns
    -------
    coincs : `numpy.ndarray`
        the indices of array `a` that were coincident with an entry in `b`
    significance : `float`
        the Poisson significance of the number of coincidences found as
        compared to the number expected by random chance
    """
    # find coincidences
    coincs = find_coincidences(a, b, dt=dt)
    n = coincs.size
    if n == 0:
        return coincs, 0
    # calculate significance
    try:
        prob = a.size * dt / livetime
    except ZeroDivisionError:
        prob = 0
    mu = prob * b.size
    return coincs, significance(n, mu)




[docs]
def significance(n, mu):
    """Calculate the significance of `n` coincidences, when `mu` were expected

    Parameters
    ----------
    n : `int`
        the number of coincidences found
    mu : `float`
        the number of coincidences expected from a Poisson process
    """
    g = gammainc(n, mu)
    if g == 0:
        sig = -n * log10(mu) + mu * LOG_EXP_1 + gammaln(n + 1) / LOG_10
    else:
        sig = -log10(g)
    return float(sig)




[docs]
def find_coincidences(a, b, dt=1):
    """Find the coincidences between values in two numpy arrays

    Parameters
    ----------
    a : `numpy.ndarray`
        first array
    b : `numpy.ndarray`
        second array
    dt : `float`, optional
        coincidence window

    Returns
    -------
    coinc : `numpy.ndarray`
        the indices of all items in `a` within [-dt/2., +dt/2.) of an item
        in `b`
    """
    dx = dt / 2.

    def _is_coincident(t):
        x = bisect_left(b, t - dx)  # find b >= t-dx
        y = bisect_right(b, t + dx)  # find b <= t+dx
        if x != y:
            return True
        return False

    out = numpy.zeros(a.size)
    for i, t in enumerate(a):
        out[i] = _is_coincident(t)
    return out.nonzero()[0]




[docs]
def veto(table, segmentlist):
    """Remove events from a table based on a segmentlist

    A time ``t`` will be vetoed if ``start <= t <= end`` for any veto
    segment in the list.

    Parameters
    ----------
    table : `numpy.recarray`
        the table of event triggers to veto
    segmentlist : `~ligo.segments.segmentlist`
        the list of veto segments to use

    Returns
    -------
    keep : `numpy.recarray`
        the reduced table of events that were not coincident with any
        segments
    """
    table.sort('time')
    times = table['time']
    segmentlist = type(segmentlist)(segmentlist).coalesce()
    keep = numpy.ones(times.shape[0], dtype=bool)
    j = 0
    a, b = segmentlist[j]
    i = 0
    while i < times.size:
        t = times[i]
        # if before start, move to next trigger now
        if t < a:
            i += 1
            continue
        # if after end, find the next segment and check this trigger again
        if t > b:
            j += 1
            try:
                a, b = segmentlist[j]
                continue
            except IndexError:
                break
        # otherwise it must be in this segment, record and move to next
        keep[i] = False
        i += 1
    return table[keep], table[~keep]




[docs]
def veto_all(auxiliary, segmentlist):
    """Remove events from all auxiliary channel tables based on a segmentlist

    Parameters
    ----------
    auxiliary : `dict` of `numpy.recarray`
        a `dict` of event arrays to veto
    segmentlist : `~ligo.segments.segmentlist`
        the list of veto segments to use

    Returns
    -------
    survivors : `dict` of `numpy.recarray`
        a dict of the reduced arrays of events for each input channel

    See Also
    --------
    core.veto
        for details on the veto algorithm itself
    """
    channels = auxiliary.keys()
    t = vstack_tables(list(auxiliary.values()))
    keep, _ = veto(t, segmentlist)
    return dict((c, keep[keep['channel'] == c]) for c in channels)