Source code for hveto.core

# -*- coding: utf-8 -*-
# Copyright (C) Joshua Smith (2016-)
#
# This file is part of the hveto python package.
#
# hveto is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# hveto is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with hveto.  If not, see <http://www.gnu.org/licenses/>.

"""Core of the HierarchichalVeto algorithm
"""

import itertools
from math import (log, exp, log10)
from bisect import (bisect_left, bisect_right)

import numpy

from scipy.special import (gammainc, gammaln)

from astropy.table import vstack as vstack_tables

from gwpy.segments import (SegmentList, Segment)

__author__ = 'Duncan Macleod <duncan.macleod@ligo.org>'
__credits__ = 'Joshua Smith <joshua.smith@ligo.org>'

LOG_10 = log(10)
LOG_EXP_1 = log10(exp(1))


# -- define round structure --------------------------------------------------

[docs]class HvetoRound(object): __slots__ = ( 'n', 'primary', 'winner', 'segments', 'vetoes', 'use_percentage', 'efficiency', 'cum_efficiency', 'cum_deadtime', 'plots', 'files', 'scans', 'rank', ) def __init__(self, round, primary, segments=None, vetoes=None, plots=[], files={}, rank=None): self.n = round self.primary = primary self.segments = segments self.vetoes = vetoes self.plots = [] self.files = {} self.scans = None self.rank = rank @property def livetime(self): return float(abs(self.segments)) @property def deadtime(self): return (float(abs((self.vetoes & self.segments).coalesce())), float(abs(self.segments)))
# -- core methods ------------------------------------------------------------
[docs]def find_all_coincidences(triggers, channel, snrs, windows): """Find the number of coincs between each auxiliary channel and the primary Parameters ---------- primary : `numpy.ndarray` an array of times for the primary channel auxiliary : `numpy.recarray` an array of triggers for a set of auxiliary channels snrs : `list` of `float` the SNR thresholds to use window : `list` of `float` the time windows to use """ # FIXME need to work out having time column the same for each channel triggers.sort('time') windows = sorted(windows, reverse=True) snrs = sorted(snrs) coincs = dict((p, {}) for p in itertools.product(windows, snrs)) ntrig = len(triggers) for i, x in enumerate(triggers): if x['channel'] != channel: continue t = x['time'] channels = dict((key, set()) for key in coincs) j = i - 1 segs = [Segment(t - dt / 2., t + dt / 2.) for dt in windows] # define coincidence test def add_if_coinc(event): if event['channel'] == channel: return in_seg = filter(lambda s: s[0] <= event['time'] <= s[1], segs) if not in_seg: # no triggers in window return for k, w in enumerate(in_seg): for snr in filter(lambda s: event['snr'] >= s, snrs): channels[(windows[k], snr)].add(event['channel']) return 1 # search left half-window while j >= 0: if not add_if_coinc(triggers[j]): break j -= 1 j = i + 1 # search right half-window while j < ntrig: if not add_if_coinc(triggers[j]): break j += 1 # count 'em up for p, cset in channels.items(): for c in cset: try: coincs[p][c] += 1 except KeyError: coincs[p][c] = 1 return coincs
[docs]def find_max_significance(primary, auxiliary, channel, snrs, windows, livetime): """Find the maximum Hveto significance for this primary-auxiliary pair Parameters ---------- primary : `numpy.recarray` record array of data from the primary channel auxiliary : `numpy.recarray` record array from the auxiliary channel snrs : `list` of `float` the SNR thresholds to use window : `list` of `float` the time windows to use Returns ------- winner : `HvetoWinner` the parameters and segments generated by the (snr, dt) with the highest significance """ rec = vstack_tables([primary] + list(auxiliary.values())) coincs = find_all_coincidences(rec, channel, snrs, windows) winner = HvetoWinner(name='unknown', significance=-1) sigs = dict((c, 0) for c in auxiliary) for p, cdict in coincs.items(): dt, snr = p for chan in cdict: mu = (len(primary) * (auxiliary[chan]['snr'] >= snr).sum() * dt / livetime) # NOTE: coincs[p][chan] counts the number of primary channel # triggers coincident with a 'chan' trigger try: sig = significance(coincs[p][chan], mu) except KeyError: sig == 0 if sig > sigs[chan]: sigs[chan] = sig if sig > winner.significance: winner.name = chan winner.snr = snr winner.window = dt winner.significance = sig winner.mu = mu return winner, sigs
[docs]class HvetoWinner(object): __slots__ = ['name', 'significance', 'snr', 'window', 'segments', 'events', 'ncoinc', 'mu'] def __init__(self, name=None, significance=None, snr=None, window=None, segments=None, events=None, ncoinc=0, mu=None): super(HvetoWinner, self).__init__() self.name = name self.significance = significance self.snr = snr self.window = window self.segments = segments self.events = events self.mu = mu
[docs] def get_segments(self, times): return SegmentList([Segment(t - self.window / 2., t + self.window / 2.) for t in times])
[docs]def coinc_significance(a, b, dt, livetime): """Calculate the significance of coincidences between two time arrays Parameters ---------- a : `numpy.ndarray` first array b : `numpy.ndarray` second array dt : `float` coincidence window livetime : `float` the livetime of the analysis Returns ------- coincs : `numpy.ndarray` the indices of array `a` that were coincident with an entry in `b` significance : `float` the Poisson significance of the number of coincidences found as compared to the number expected by random chance """ # find coincidences coincs = find_coincidences(a, b, dt=dt) n = coincs.size if n == 0: return coincs, 0 # calculate significance try: prob = a.size * dt / livetime except ZeroDivisionError: prob = 0 mu = prob * b.size return coincs, significance(n, mu)
[docs]def significance(n, mu): """Calculate the significance of `n` coincidences, when `mu` were expected Parameters ---------- n : `int` the number of coincidences found mu : `float` the number of coincidences expected from a Poisson process """ g = gammainc(n, mu) if g == 0: sig = -n * log10(mu) + mu * LOG_EXP_1 + gammaln(n + 1) / LOG_10 else: sig = -log10(g) return float(sig)
[docs]def find_coincidences(a, b, dt=1): """Find the coincidences between values in two numpy arrays Parameters ---------- a : `numpy.ndarray` first array b : `numpy.ndarray` second array dt : `float`, optional coincidence window Returns ------- coinc : `numpy.ndarray` the indices of all items in `a` within [-dt/2., +dt/2.) of an item in `b` """ dx = dt / 2. def _is_coincident(t): x = bisect_left(b, t - dx) # find b >= t-dx y = bisect_right(b, t + dx) # find b <= t+dx if x != y: return True return False out = numpy.zeros(a.size) for i, t in enumerate(a): out[i] = _is_coincident(t) return out.nonzero()[0]
[docs]def veto(table, segmentlist): """Remove events from a table based on a segmentlist A time ``t`` will be vetoed if ``start <= t <= end`` for any veto segment in the list. Parameters ---------- table : `numpy.recarray` the table of event triggers to veto segmentlist : `~ligo.segments.segmentlist` the list of veto segments to use Returns ------- keep : `numpy.recarray` the reduced table of events that were not coincident with any segments """ table.sort('time') times = table['time'] segmentlist = type(segmentlist)(segmentlist).coalesce() keep = numpy.ones(times.shape[0], dtype=bool) j = 0 a, b = segmentlist[j] i = 0 while i < times.size: t = times[i] # if before start, move to next trigger now if t < a: i += 1 continue # if after end, find the next segment and check this trigger again if t > b: j += 1 try: a, b = segmentlist[j] continue except IndexError: break # otherwise it must be in this segment, record and move to next keep[i] = False i += 1 return table[keep], table[~keep]
[docs]def veto_all(auxiliary, segmentlist): """Remove events from all auxiliary channel tables based on a segmentlist Parameters ---------- auxiliary : `dict` of `numpy.recarray` a `dict` of event arrays to veto segmentlist : `~ligo.segments.segmentlist` the list of veto segments to use Returns ------- survivors : `dict` of `numpy.recarray` a dict of the reduced arrays of events for each input channel See Also -------- core.veto for details on the veto algorithm itself """ channels = auxiliary.keys() t = vstack_tables(list(auxiliary.values())) keep, _ = veto(t, segmentlist) return dict((c, keep[keep['channel'] == c]) for c in channels)