DefaultAssociator.java

/*
 * DefaultAssociator
 */
package gov.usgs.earthquake.indexer;

import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;

import java.util.logging.Level;
import java.util.logging.Logger;

/**
 * Utilities for associating events.
 *
 * Based on the QDM EQEventsUtils class.
 */
public class DefaultAssociator implements Associator {

  private static final Logger LOGGER = Logger.getLogger(DefaultAssociator.class.getName());

  // time
  /** Distance between related events in time, in milliseconds. */
  public static final long TIME_DIFF_MILLISECONDS = 16 * 1000;

  // space
  /** Distance between related events in space, in kilometers. */
  public static final BigDecimal LOCATION_DIFF_KILOMETER = new BigDecimal(100);

  /** Number of kilometers in a degree at the equator. */
  public static final BigDecimal KILOMETERS_PER_DEGREE = new BigDecimal("111.12");

  /**
   * Distance between related events latitude, in degrees.
   *
   * This is based on the max number of kilometers per degree, and provides the
   * maximum latitude separation (assuming events share a longitude).
   *
   * Used as a pre-filter before more expensive checks.
   */
  public static final BigDecimal LOCATION_DIFF_DEGREES = new BigDecimal(
      LOCATION_DIFF_KILOMETER.doubleValue() / KILOMETERS_PER_DEGREE.doubleValue());

  /**
   * Build an index search that searches for associated products. Products are
   * considered associated if the eventid matches or their location is within a
   * certain distance.
   */
  public SearchRequest getSearchRequest(ProductSummary summary) {
    SearchRequest request = new SearchRequest();

    // Order is important here. The eventId query must be added first
    ProductIndexQuery eventIdQuery = getEventIdQuery(summary.getEventSource(), summary.getEventSourceCode());
    if (eventIdQuery != null) {
      request.addQuery(new EventDetailQuery(eventIdQuery));
    }

    // Now a query that looks for location
    ProductIndexQuery locationQuery = getLocationQuery(summary.getEventTime(), summary.getEventLatitude(),
        summary.getEventLongitude());
    if (locationQuery != null) {
      request.addQuery(new EventDetailQuery(locationQuery));
    }

    return request;
  }

  /**
   * Choose and return the most closely associated event.
   *
   * @param events  a list of candidate events.
   * @param summary the summary being associated.
   * @return the best match event from the list of events.
   */
  public Event chooseEvent(final List<Event> events, final ProductSummary summary) {
    List<Event> filteredEvents = new LinkedList<Event>();

    // remove events that are from the same source with a different code
    String summarySource = summary.getEventSource();
    String summaryCode = summary.getEventSourceCode();
    if (summarySource == null || summaryCode == null) {
      // can't check if same source with different code
      filteredEvents = events;
    } else {
      // try to associate by event id
      Iterator<Event> iter = events.iterator();
      while (iter.hasNext()) {
        Event event = iter.next();

        boolean sameSourceDifferentCode = false;
        Iterator<ProductSummary> summaryIter;

        if (event.isDeleted()) {
          // ignore delete products before checking
          summaryIter = Event.getWithoutSuperseded(Event.getWithoutDeleted(event.getAllProductList())).iterator();
        } else {
          summaryIter = event.getProductList().iterator();
        }
        while (summaryIter.hasNext()) {
          ProductSummary nextSummary = summaryIter.next();
          if (summarySource.equalsIgnoreCase(nextSummary.getEventSource())) {
            if (summaryCode.equalsIgnoreCase(nextSummary.getEventSourceCode())) {
              // this is the event we are looking for! so stop
              // already
              return event;
            } else {
              // different event code from same source, probably a
              // different event. Don't give up yet, because
              // associate may force multiple codes from same
              // source in same event.
              sameSourceDifferentCode = true;
            }
          }
        }

        if (!sameSourceDifferentCode) {
          filteredEvents.add(event);
        }
      }
    }

    // no events found
    if (filteredEvents.size() == 0) {
      return null;
    }

    // more than one event found
    else if (filteredEvents.size() > 1) {
      ArrayList<String> matches = new ArrayList<String>();
      Iterator<Event> iter = filteredEvents.iterator();
      while (iter.hasNext()) {
        Event match = iter.next();
        matches.add(match.getEventId());
      }
      LOGGER.log(Level.WARNING,
          "Potential merge, product id=" + summary.getId().toString() + ", nearby events: " + matches.toString());

      // Return the "closest" event
      Event mostSimilar = chooseMostSimilar(summary, filteredEvents);
      if (mostSimilar != null) {
        LOGGER.log(Level.FINE,
            "Associated product id=" + summary.getId().toString() + ", to event id=" + mostSimilar.getEventId());
      }
      return mostSimilar;
    }

    // one event found
    else {
      return filteredEvents.get(0);
    }
  }

  /**
   * For the given list of events, find the one that is "closest" to the given
   * product. Similarity is calculated by first subtracting the event parameter
   * from the product parameter, normalizing between 1 and -1, then calculating
   * the Euclidean distance in the 3D space composed of the normalized lat, lon,
   * and time vectors.
   *
   * @param summary ProductSummary to compare events with
   * @param events  List of events
   * @return Event with lowest distance
   */
  protected Event chooseMostSimilar(ProductSummary summary, List<Event> events) {
    double lowest = Double.POSITIVE_INFINITY;
    Event bestMatch = null;

    if (summary.getEventLatitude() == null || summary.getEventLongitude() == null || summary.getEventTime() == null) {
      // cannot choose most similar
      if (events.size() > 0) {
        // choose first
        return events.get(0);
      } else {
        return null;
      }
    }

    // find "closest" event
    Iterator<Event> iter = events.iterator();
    while (iter.hasNext()) {
      Event event = iter.next();
      try {
        EventSummary eventSummary = event.getEventSummary();
        // First get the difference between the lat, lon, and time
        double deltaLat = summary.getEventLatitude().subtract(eventSummary.getLatitude()).doubleValue();
        double deltaLon = summary.getEventLongitude().subtract(eventSummary.getLongitude()).doubleValue();
        double deltaTime = summary.getEventTime().getTime() - eventSummary.getTime().getTime();
        // Each of the deltas will now be between the range
        // -TIME_DIFF_MILLISECONDS to +TIME_DIFF_MILLISECONDS (or
        // whatever
        // the units are). To normalize, between -1 and 1, we just need
        // to
        // divide by TIME_DIFF_MILLISECONDS
        deltaLat = deltaLat / LOCATION_DIFF_DEGREES.doubleValue();
        deltaLon = deltaLon / LOCATION_DIFF_DEGREES.doubleValue();
        deltaTime = deltaTime / TIME_DIFF_MILLISECONDS;

        // Calculate the Euclidean distance between the summary and the
        // vector representing this event
        double distance = Math.sqrt(deltaLat * deltaLat + deltaLon * deltaLon + deltaTime * deltaTime);
        if (distance < lowest) {
          lowest = distance;
          bestMatch = event;
        }
      } catch (Exception e) {
        LOGGER.log(Level.WARNING, "Exception checking for most similar event", e);
        // only log, but continue processing
        if (bestMatch == null) {
          // pick an event, but don't update "lowest"
          bestMatch = event;
        }
      }
    }

    return bestMatch;
  }

  /**
   * Check if two events are associated to each other.
   *
   * Reasons events may be considered disassociated:
   * <ol>
   * <li>Share a common EVENTSOURCE with different EVENTSOURCECODE.</li>
   * <li>Either has a disassociate product for the other.</li>
   * <li>Preferred location in space and time is NOT nearby, and no other reason
   * to associate.</li>
   * </ol>
   *
   * Reasons events may be considered associated:
   * <ol>
   * <li>Share a common EVENTID</li>
   * <li>Either has an associate product for the other.</li>
   * <li>Their preferred location in space and time is nearby.</li>
   * </ol>
   *
   * @param event1 candidate event to test.
   * @param event2 candidate event to test.
   * @return true if associated, false otherwise.
   */
  @Override
  public boolean eventsAssociated(Event event1, Event event2) {

    // ---------------------------------------------------------//
    // -- Is there an explicit association or disassocation? -- //
    // ---------------------------------------------------------//

    // check disassociation first
    if (event1.hasDisassociateProduct(event2) || event2.hasDisassociateProduct(event1)) {
      // explicitly disassociated
      return false;
    }

    // associate overrides usual event source rules.
    if (event1.hasAssociateProduct(event2) || event2.hasAssociateProduct(event1)) {
      // explicitly associated
      return true;
    }

    EventSummary event1Summary = event1.getEventSummary();
    EventSummary event2Summary = event2.getEventSummary();

    // ---------------------------------- //
    // -- Do events share an eventid ? -- //
    // ---------------------------------- //
    // this check happens after associate and disassociate to allow two
    // events from the same source to be forced to associate
    // (bad network, bad)

    // THIS CHECKS PREFERRED EVENT ID
    // if source is same, check code
    String event1Source = event1Summary.getSource();
    String event2Source = event2Summary.getSource();
    if (event1Source != null && event2Source != null && event1Source.equalsIgnoreCase(event2Source)) {
      String event1Code = event1Summary.getSourceCode();
      String event2Code = event2Summary.getSourceCode();
      // this is somewhat implied, (preferred source+code are
      // combination) but be safe anyways
      if (event1Code != null && event2Code != null) {
        if (event1Code.equalsIgnoreCase(event2Code)) {
          // same event id
          return true;
        } else {
          // different event id from same source
          return false;
        }
      }
    }

    // THIS CHECKS NON-PREFERRED EVENT IDS Map<String, String>
    // ignore deleted sub events for this comparison
    Map<String, List<String>> event1Codes = event1.getAllEventCodes(false);
    Map<String, List<String>> event2Codes = event2.getAllEventCodes(false);
    Set<String> commonSources = event1Codes.keySet();
    commonSources.retainAll(event2Codes.keySet());

    Iterator<String> eventSourceIter = commonSources.iterator();
    while (eventSourceIter.hasNext()) {
      String source = eventSourceIter.next();
      List<String> event1SourceCodes = event1Codes.get(source);
      List<String> event2SourceCodes = event2Codes.get(source);

      Iterator<String> iter = event1SourceCodes.iterator();
      while (iter.hasNext()) {
        if (!event2SourceCodes.contains(iter.next())) {
          return false;
        }
      }

      iter = event1SourceCodes.iterator();
      while (iter.hasNext()) {
        if (!event1SourceCodes.contains(iter.next())) {
          return false;
        }
      }
    }

    // --------------------------------------------------- //
    // -- Are event locations (lat/lon/time) "nearby" ? -- //
    // --------------------------------------------------- //
    if (queryContainsLocation(
        getLocationQuery(event1Summary.getTime(), event1Summary.getLatitude(), event1Summary.getLongitude()),
        event2Summary.getTime(), event2Summary.getLatitude(), event2Summary.getLongitude())) {
      // location matches
      return true;
    }

    return false;
  }

  /**
   * Build a ProductIndexQuery that searches based on event id.
   *
   * @param eventSource the eventSource to search
   * @param eventCode   the eventCode to search
   * @return null if eventSource or eventCode are null, otherwise a
   *         ProductIndexQuery. A returned ProductIndexQuery will have
   *         EventSearchType SEARCH_EVENT_PREFERRED and ResultType
   *         RESULT_TYPE_ALL.
   */
  @Override
  public ProductIndexQuery getEventIdQuery(final String eventSource, final String eventCode) {
    ProductIndexQuery query = null;

    if (eventSource != null && eventCode != null) {
      query = new ProductIndexQuery();
      // search all products, not just preferred (in case the preferred is
      // a delete)
      query.setEventSearchType(ProductIndexQuery.SEARCH_EVENT_PRODUCTS);
      query.setResultType(ProductIndexQuery.RESULT_TYPE_ALL);

      query.setEventSource(eventSource);
      query.setEventSourceCode(eventCode);

      query.log(LOGGER);
    }

    return query;
  }

  /**
   * Build a ProductIndexQuery that searches based on location.
   *
   *
   * @param time      the time to search around.
   * @param latitude  the latitude to search around.
   * @param longitude the longitude to search around.
   * @return null if time, latitude, or longitude are null, otherwise a
   *         ProductIndexQuery. A returned ProductIndexQuery will have
   *         EventSearchType SEARCH_EVENT_PREFERRED and ResultType
   *         RESULT_TYPE_ALL.
   */
  @Override
  public ProductIndexQuery getLocationQuery(final Date time, final BigDecimal latitude, final BigDecimal longitude) {
    ProductIndexQuery query = null;
    if (time != null && latitude != null && longitude != null) {
      query = new ProductIndexQuery();

      // search all products, not just preferred (in case the preferred is
      // a delete)
      query.setEventSearchType(ProductIndexQuery.SEARCH_EVENT_PREFERRED);
      query.setResultType(ProductIndexQuery.RESULT_TYPE_ALL);

      // time
      query.setMinEventTime(new Date(time.getTime() - TIME_DIFF_MILLISECONDS));
      query.setMaxEventTime(new Date(time.getTime() + TIME_DIFF_MILLISECONDS));

      // latitude
      query.setMinEventLatitude(latitude.subtract(LOCATION_DIFF_DEGREES));
      query.setMaxEventLatitude(latitude.add(LOCATION_DIFF_DEGREES));

      // longitude
      double lat = latitude.abs().doubleValue();
      if (lat < 89.0) {
        // only restrict longitude when not close to a pole...
        BigDecimal adjustedLongitudeDiff = new BigDecimal(
            LOCATION_DIFF_DEGREES.doubleValue() / Math.cos(Math.toRadians(lat)));
        query.setMinEventLongitude(longitude.subtract(adjustedLongitudeDiff));
        query.setMaxEventLongitude(longitude.add(adjustedLongitudeDiff));

        /* make sure to compare across date/time line */
        BigDecimal minLon = query.getMinEventLongitude();
        BigDecimal maxLon = query.getMaxEventLongitude();

        // Normalize the longitudes between -180 and 180
        query.setMinEventLongitude(JDBCProductIndex.normalizeLongitude(minLon));
        query.setMaxEventLongitude(JDBCProductIndex.normalizeLongitude(maxLon));

      }

      query.log(LOGGER);
    }

    return query;
  }

  /**
   * Check if a location would be matched by a ProductIndexQuery.
   *
   * @param query     location query
   * @param time      time to check
   * @param latitude  latitude to check
   * @param longitude longitude to check
   * @return false if query, time, latitude, or longitude are null, or if min/max
   *         time, latitude, longitude are set and do not match time, latitude, or
   *         longitude. otherwise, true.
   */
  protected boolean queryContainsLocation(final ProductIndexQuery query, final Date time, final BigDecimal latitude,
      final BigDecimal longitude) {

    if (query == null || time == null || latitude == null || longitude == null) {
      // no query or location? no contains
      return false;
    }

    if (query.getMinEventTime() != null && query.getMinEventTime().after(time)) {
      // time too early
      return false;
    }
    if (query.getMaxEventTime() != null && query.getMaxEventTime().before(time)) {
      // time too late
      return false;
    }

    if (query.getMinEventLatitude() != null && query.getMinEventLatitude().compareTo(latitude) > 0) {
      // latitude too small
      return false;
    }
    if (query.getMaxEventLatitude() != null && query.getMaxEventLatitude().compareTo(latitude) < 0) {
      // latitude too large
      return false;
    }

    if (query.getMinEventLongitude() != null && query.getMaxEventLongitude() != null) {

      /*
       * longitude range check for min & max longitude when the locationQuery spans
       * the date line
       */
      if (query.getMinEventLongitude().compareTo(query.getMaxEventLongitude()) > 0) {

        boolean inBounds = false;

        // MAX:: getMaxLongitude < longitude <= -180
        if (longitude.compareTo(query.getMaxEventLongitude()) < 0 && longitude.compareTo(new BigDecimal("-180")) >= 0) {
          inBounds = true;
        }

        // MIN:: 180 >= longitude > getMinEventLongitude
        if (longitude.compareTo(query.getMinEventLongitude()) > 0 && longitude.compareTo(new BigDecimal("180")) <= 0) {
          inBounds = true;
        }

        if (!inBounds) {
          return false;
        }

      } else {

        if (query.getMinEventLongitude().compareTo(longitude) > 0) {
          // longitude too small
          return false;
        }
        if (query.getMaxEventLongitude().compareTo(longitude) < 0) {
          // longitude too large
          return false;
        }
      }
    }

    // must contain location
    return true;
  }
}