DefaultAssociator.java

  1. /*
  2.  * DefaultAssociator
  3.  */
  4. package gov.usgs.earthquake.indexer;

  5. import java.math.BigDecimal;
  6. import java.util.ArrayList;
  7. import java.util.Date;
  8. import java.util.Iterator;
  9. import java.util.LinkedList;
  10. import java.util.List;
  11. import java.util.Map;
  12. import java.util.Set;

  13. import java.util.logging.Level;
  14. import java.util.logging.Logger;

  15. /**
  16.  * Utilities for associating events.
  17.  *
  18.  * Based on the QDM EQEventsUtils class.
  19.  */
  20. public class DefaultAssociator implements Associator {

  21.   private static final Logger LOGGER = Logger.getLogger(DefaultAssociator.class.getName());

  22.   // time
  23.   /** Distance between related events in time, in milliseconds. */
  24.   public static final long TIME_DIFF_MILLISECONDS = 16 * 1000;

  25.   // space
  26.   /** Distance between related events in space, in kilometers. */
  27.   public static final BigDecimal LOCATION_DIFF_KILOMETER = new BigDecimal(100);

  28.   /** Number of kilometers in a degree at the equator. */
  29.   public static final BigDecimal KILOMETERS_PER_DEGREE = new BigDecimal("111.12");

  30.   /**
  31.    * Distance between related events latitude, in degrees.
  32.    *
  33.    * This is based on the max number of kilometers per degree, and provides the
  34.    * maximum latitude separation (assuming events share a longitude).
  35.    *
  36.    * Used as a pre-filter before more expensive checks.
  37.    */
  38.   public static final BigDecimal LOCATION_DIFF_DEGREES = new BigDecimal(
  39.       LOCATION_DIFF_KILOMETER.doubleValue() / KILOMETERS_PER_DEGREE.doubleValue());

  40.   /**
  41.    * Build an index search that searches for associated products. Products are
  42.    * considered associated if the eventid matches or their location is within a
  43.    * certain distance.
  44.    */
  45.   public SearchRequest getSearchRequest(ProductSummary summary) {
  46.     SearchRequest request = new SearchRequest();

  47.     // Order is important here. The eventId query must be added first
  48.     ProductIndexQuery eventIdQuery = getEventIdQuery(summary.getEventSource(), summary.getEventSourceCode());
  49.     if (eventIdQuery != null) {
  50.       request.addQuery(new EventDetailQuery(eventIdQuery));
  51.     }

  52.     // Now a query that looks for location
  53.     ProductIndexQuery locationQuery = getLocationQuery(summary.getEventTime(), summary.getEventLatitude(),
  54.         summary.getEventLongitude());
  55.     if (locationQuery != null) {
  56.       request.addQuery(new EventDetailQuery(locationQuery));
  57.     }

  58.     return request;
  59.   }

  60.   /**
  61.    * Choose and return the most closely associated event.
  62.    *
  63.    * @param events  a list of candidate events.
  64.    * @param summary the summary being associated.
  65.    * @return the best match event from the list of events.
  66.    */
  67.   public Event chooseEvent(final List<Event> events, final ProductSummary summary) {
  68.     List<Event> filteredEvents = new LinkedList<Event>();

  69.     // remove events that are from the same source with a different code
  70.     String summarySource = summary.getEventSource();
  71.     String summaryCode = summary.getEventSourceCode();
  72.     if (summarySource == null || summaryCode == null) {
  73.       // can't check if same source with different code
  74.       filteredEvents = events;
  75.     } else {
  76.       // try to associate by event id
  77.       Iterator<Event> iter = events.iterator();
  78.       while (iter.hasNext()) {
  79.         Event event = iter.next();

  80.         boolean sameSourceDifferentCode = false;
  81.         Iterator<ProductSummary> summaryIter;

  82.         if (event.isDeleted()) {
  83.           // ignore delete products before checking
  84.           summaryIter = Event.getWithoutSuperseded(Event.getWithoutDeleted(event.getAllProductList())).iterator();
  85.         } else {
  86.           summaryIter = event.getProductList().iterator();
  87.         }
  88.         while (summaryIter.hasNext()) {
  89.           ProductSummary nextSummary = summaryIter.next();
  90.           if (summarySource.equalsIgnoreCase(nextSummary.getEventSource())) {
  91.             if (summaryCode.equalsIgnoreCase(nextSummary.getEventSourceCode())) {
  92.               // this is the event we are looking for! so stop
  93.               // already
  94.               return event;
  95.             } else {
  96.               // different event code from same source, probably a
  97.               // different event. Don't give up yet, because
  98.               // associate may force multiple codes from same
  99.               // source in same event.
  100.               sameSourceDifferentCode = true;
  101.             }
  102.           }
  103.         }

  104.         if (!sameSourceDifferentCode) {
  105.           filteredEvents.add(event);
  106.         }
  107.       }
  108.     }

  109.     // no events found
  110.     if (filteredEvents.size() == 0) {
  111.       return null;
  112.     }

  113.     // more than one event found
  114.     else if (filteredEvents.size() > 1) {
  115.       ArrayList<String> matches = new ArrayList<String>();
  116.       Iterator<Event> iter = filteredEvents.iterator();
  117.       while (iter.hasNext()) {
  118.         Event match = iter.next();
  119.         matches.add(match.getEventId());
  120.       }
  121.       LOGGER.log(Level.WARNING,
  122.           "Potential merge, product id=" + summary.getId().toString() + ", nearby events: " + matches.toString());

  123.       // Return the "closest" event
  124.       Event mostSimilar = chooseMostSimilar(summary, filteredEvents);
  125.       if (mostSimilar != null) {
  126.         LOGGER.log(Level.FINE,
  127.             "Associated product id=" + summary.getId().toString() + ", to event id=" + mostSimilar.getEventId());
  128.       }
  129.       return mostSimilar;
  130.     }

  131.     // one event found
  132.     else {
  133.       return filteredEvents.get(0);
  134.     }
  135.   }

  136.   /**
  137.    * For the given list of events, find the one that is "closest" to the given
  138.    * product. Similarity is calculated by first subtracting the event parameter
  139.    * from the product parameter, normalizing between 1 and -1, then calculating
  140.    * the Euclidean distance in the 3D space composed of the normalized lat, lon,
  141.    * and time vectors.
  142.    *
  143.    * @param summary ProductSummary to compare events with
  144.    * @param events  List of events
  145.    * @return Event with lowest distance
  146.    */
  147.   protected Event chooseMostSimilar(ProductSummary summary, List<Event> events) {
  148.     double lowest = Double.POSITIVE_INFINITY;
  149.     Event bestMatch = null;

  150.     if (summary.getEventLatitude() == null || summary.getEventLongitude() == null || summary.getEventTime() == null) {
  151.       // cannot choose most similar
  152.       if (events.size() > 0) {
  153.         // choose first
  154.         return events.get(0);
  155.       } else {
  156.         return null;
  157.       }
  158.     }

  159.     // find "closest" event
  160.     Iterator<Event> iter = events.iterator();
  161.     while (iter.hasNext()) {
  162.       Event event = iter.next();
  163.       try {
  164.         EventSummary eventSummary = event.getEventSummary();
  165.         // First get the difference between the lat, lon, and time
  166.         double deltaLat = summary.getEventLatitude().subtract(eventSummary.getLatitude()).doubleValue();
  167.         double deltaLon = summary.getEventLongitude().subtract(eventSummary.getLongitude()).doubleValue();
  168.         double deltaTime = summary.getEventTime().getTime() - eventSummary.getTime().getTime();
  169.         // Each of the deltas will now be between the range
  170.         // -TIME_DIFF_MILLISECONDS to +TIME_DIFF_MILLISECONDS (or
  171.         // whatever
  172.         // the units are). To normalize, between -1 and 1, we just need
  173.         // to
  174.         // divide by TIME_DIFF_MILLISECONDS
  175.         deltaLat = deltaLat / LOCATION_DIFF_DEGREES.doubleValue();
  176.         deltaLon = deltaLon / LOCATION_DIFF_DEGREES.doubleValue();
  177.         deltaTime = deltaTime / TIME_DIFF_MILLISECONDS;

  178.         // Calculate the Euclidean distance between the summary and the
  179.         // vector representing this event
  180.         double distance = Math.sqrt(deltaLat * deltaLat + deltaLon * deltaLon + deltaTime * deltaTime);
  181.         if (distance < lowest) {
  182.           lowest = distance;
  183.           bestMatch = event;
  184.         }
  185.       } catch (Exception e) {
  186.         LOGGER.log(Level.WARNING, "Exception checking for most similar event", e);
  187.         // only log, but continue processing
  188.         if (bestMatch == null) {
  189.           // pick an event, but don't update "lowest"
  190.           bestMatch = event;
  191.         }
  192.       }
  193.     }

  194.     return bestMatch;
  195.   }

  196.   /**
  197.    * Check if two events are associated to each other.
  198.    *
  199.    * Reasons events may be considered disassociated:
  200.    * <ol>
  201.    * <li>Share a common EVENTSOURCE with different EVENTSOURCECODE.</li>
  202.    * <li>Either has a disassociate product for the other.</li>
  203.    * <li>Preferred location in space and time is NOT nearby, and no other reason
  204.    * to associate.</li>
  205.    * </ol>
  206.    *
  207.    * Reasons events may be considered associated:
  208.    * <ol>
  209.    * <li>Share a common EVENTID</li>
  210.    * <li>Either has an associate product for the other.</li>
  211.    * <li>Their preferred location in space and time is nearby.</li>
  212.    * </ol>
  213.    *
  214.    * @param event1 candidate event to test.
  215.    * @param event2 candidate event to test.
  216.    * @return true if associated, false otherwise.
  217.    */
  218.   @Override
  219.   public boolean eventsAssociated(Event event1, Event event2) {

  220.     // ---------------------------------------------------------//
  221.     // -- Is there an explicit association or disassocation? -- //
  222.     // ---------------------------------------------------------//

  223.     // check disassociation first
  224.     if (event1.hasDisassociateProduct(event2) || event2.hasDisassociateProduct(event1)) {
  225.       // explicitly disassociated
  226.       return false;
  227.     }

  228.     // associate overrides usual event source rules.
  229.     if (event1.hasAssociateProduct(event2) || event2.hasAssociateProduct(event1)) {
  230.       // explicitly associated
  231.       return true;
  232.     }

  233.     EventSummary event1Summary = event1.getEventSummary();
  234.     EventSummary event2Summary = event2.getEventSummary();

  235.     // ---------------------------------- //
  236.     // -- Do events share an eventid ? -- //
  237.     // ---------------------------------- //
  238.     // this check happens after associate and disassociate to allow two
  239.     // events from the same source to be forced to associate
  240.     // (bad network, bad)

  241.     // THIS CHECKS PREFERRED EVENT ID
  242.     // if source is same, check code
  243.     String event1Source = event1Summary.getSource();
  244.     String event2Source = event2Summary.getSource();
  245.     if (event1Source != null && event2Source != null && event1Source.equalsIgnoreCase(event2Source)) {
  246.       String event1Code = event1Summary.getSourceCode();
  247.       String event2Code = event2Summary.getSourceCode();
  248.       // this is somewhat implied, (preferred source+code are
  249.       // combination) but be safe anyways
  250.       if (event1Code != null && event2Code != null) {
  251.         if (event1Code.equalsIgnoreCase(event2Code)) {
  252.           // same event id
  253.           return true;
  254.         } else {
  255.           // different event id from same source
  256.           return false;
  257.         }
  258.       }
  259.     }

  260.     // THIS CHECKS NON-PREFERRED EVENT IDS Map<String, String>
  261.     // ignore deleted sub events for this comparison
  262.     Map<String, List<String>> event1Codes = event1.getAllEventCodes(false);
  263.     Map<String, List<String>> event2Codes = event2.getAllEventCodes(false);
  264.     Set<String> commonSources = event1Codes.keySet();
  265.     commonSources.retainAll(event2Codes.keySet());

  266.     Iterator<String> eventSourceIter = commonSources.iterator();
  267.     while (eventSourceIter.hasNext()) {
  268.       String source = eventSourceIter.next();
  269.       List<String> event1SourceCodes = event1Codes.get(source);
  270.       List<String> event2SourceCodes = event2Codes.get(source);

  271.       Iterator<String> iter = event1SourceCodes.iterator();
  272.       while (iter.hasNext()) {
  273.         if (!event2SourceCodes.contains(iter.next())) {
  274.           return false;
  275.         }
  276.       }

  277.       iter = event1SourceCodes.iterator();
  278.       while (iter.hasNext()) {
  279.         if (!event1SourceCodes.contains(iter.next())) {
  280.           return false;
  281.         }
  282.       }
  283.     }

  284.     // --------------------------------------------------- //
  285.     // -- Are event locations (lat/lon/time) "nearby" ? -- //
  286.     // --------------------------------------------------- //
  287.     if (queryContainsLocation(
  288.         getLocationQuery(event1Summary.getTime(), event1Summary.getLatitude(), event1Summary.getLongitude()),
  289.         event2Summary.getTime(), event2Summary.getLatitude(), event2Summary.getLongitude())) {
  290.       // location matches
  291.       return true;
  292.     }

  293.     return false;
  294.   }

  295.   /**
  296.    * Build a ProductIndexQuery that searches based on event id.
  297.    *
  298.    * @param eventSource the eventSource to search
  299.    * @param eventCode   the eventCode to search
  300.    * @return null if eventSource or eventCode are null, otherwise a
  301.    *         ProductIndexQuery. A returned ProductIndexQuery will have
  302.    *         EventSearchType SEARCH_EVENT_PREFERRED and ResultType
  303.    *         RESULT_TYPE_ALL.
  304.    */
  305.   @Override
  306.   public ProductIndexQuery getEventIdQuery(final String eventSource, final String eventCode) {
  307.     ProductIndexQuery query = null;

  308.     if (eventSource != null && eventCode != null) {
  309.       query = new ProductIndexQuery();
  310.       // search all products, not just preferred (in case the preferred is
  311.       // a delete)
  312.       query.setEventSearchType(ProductIndexQuery.SEARCH_EVENT_PRODUCTS);
  313.       query.setResultType(ProductIndexQuery.RESULT_TYPE_ALL);

  314.       query.setEventSource(eventSource);
  315.       query.setEventSourceCode(eventCode);

  316.       query.log(LOGGER);
  317.     }

  318.     return query;
  319.   }

  320.   /**
  321.    * Build a ProductIndexQuery that searches based on location.
  322.    *
  323.    *
  324.    * @param time      the time to search around.
  325.    * @param latitude  the latitude to search around.
  326.    * @param longitude the longitude to search around.
  327.    * @return null if time, latitude, or longitude are null, otherwise a
  328.    *         ProductIndexQuery. A returned ProductIndexQuery will have
  329.    *         EventSearchType SEARCH_EVENT_PREFERRED and ResultType
  330.    *         RESULT_TYPE_ALL.
  331.    */
  332.   @Override
  333.   public ProductIndexQuery getLocationQuery(final Date time, final BigDecimal latitude, final BigDecimal longitude) {
  334.     ProductIndexQuery query = null;
  335.     if (time != null && latitude != null && longitude != null) {
  336.       query = new ProductIndexQuery();

  337.       // search all products, not just preferred (in case the preferred is
  338.       // a delete)
  339.       query.setEventSearchType(ProductIndexQuery.SEARCH_EVENT_PREFERRED);
  340.       query.setResultType(ProductIndexQuery.RESULT_TYPE_ALL);

  341.       // time
  342.       query.setMinEventTime(new Date(time.getTime() - TIME_DIFF_MILLISECONDS));
  343.       query.setMaxEventTime(new Date(time.getTime() + TIME_DIFF_MILLISECONDS));

  344.       // latitude
  345.       query.setMinEventLatitude(latitude.subtract(LOCATION_DIFF_DEGREES));
  346.       query.setMaxEventLatitude(latitude.add(LOCATION_DIFF_DEGREES));

  347.       // longitude
  348.       double lat = latitude.abs().doubleValue();
  349.       if (lat < 89.0) {
  350.         // only restrict longitude when not close to a pole...
  351.         BigDecimal adjustedLongitudeDiff = new BigDecimal(
  352.             LOCATION_DIFF_DEGREES.doubleValue() / Math.cos(Math.toRadians(lat)));
  353.         query.setMinEventLongitude(longitude.subtract(adjustedLongitudeDiff));
  354.         query.setMaxEventLongitude(longitude.add(adjustedLongitudeDiff));

  355.         /* make sure to compare across date/time line */
  356.         BigDecimal minLon = query.getMinEventLongitude();
  357.         BigDecimal maxLon = query.getMaxEventLongitude();

  358.         // Normalize the longitudes between -180 and 180
  359.         query.setMinEventLongitude(JDBCProductIndex.normalizeLongitude(minLon));
  360.         query.setMaxEventLongitude(JDBCProductIndex.normalizeLongitude(maxLon));

  361.       }

  362.       query.log(LOGGER);
  363.     }

  364.     return query;
  365.   }

  366.   /**
  367.    * Check if a location would be matched by a ProductIndexQuery.
  368.    *
  369.    * @param query     location query
  370.    * @param time      time to check
  371.    * @param latitude  latitude to check
  372.    * @param longitude longitude to check
  373.    * @return false if query, time, latitude, or longitude are null, or if min/max
  374.    *         time, latitude, longitude are set and do not match time, latitude, or
  375.    *         longitude. otherwise, true.
  376.    */
  377.   protected boolean queryContainsLocation(final ProductIndexQuery query, final Date time, final BigDecimal latitude,
  378.       final BigDecimal longitude) {

  379.     if (query == null || time == null || latitude == null || longitude == null) {
  380.       // no query or location? no contains
  381.       return false;
  382.     }

  383.     if (query.getMinEventTime() != null && query.getMinEventTime().after(time)) {
  384.       // time too early
  385.       return false;
  386.     }
  387.     if (query.getMaxEventTime() != null && query.getMaxEventTime().before(time)) {
  388.       // time too late
  389.       return false;
  390.     }

  391.     if (query.getMinEventLatitude() != null && query.getMinEventLatitude().compareTo(latitude) > 0) {
  392.       // latitude too small
  393.       return false;
  394.     }
  395.     if (query.getMaxEventLatitude() != null && query.getMaxEventLatitude().compareTo(latitude) < 0) {
  396.       // latitude too large
  397.       return false;
  398.     }

  399.     if (query.getMinEventLongitude() != null && query.getMaxEventLongitude() != null) {

  400.       /*
  401.        * longitude range check for min & max longitude when the locationQuery spans
  402.        * the date line
  403.        */
  404.       if (query.getMinEventLongitude().compareTo(query.getMaxEventLongitude()) > 0) {

  405.         boolean inBounds = false;

  406.         // MAX:: getMaxLongitude < longitude <= -180
  407.         if (longitude.compareTo(query.getMaxEventLongitude()) < 0 && longitude.compareTo(new BigDecimal("-180")) >= 0) {
  408.           inBounds = true;
  409.         }

  410.         // MIN:: 180 >= longitude > getMinEventLongitude
  411.         if (longitude.compareTo(query.getMinEventLongitude()) > 0 && longitude.compareTo(new BigDecimal("180")) <= 0) {
  412.           inBounds = true;
  413.         }

  414.         if (!inBounds) {
  415.           return false;
  416.         }

  417.       } else {

  418.         if (query.getMinEventLongitude().compareTo(longitude) > 0) {
  419.           // longitude too small
  420.           return false;
  421.         }
  422.         if (query.getMaxEventLongitude().compareTo(longitude) < 0) {
  423.           // longitude too large
  424.           return false;
  425.         }
  426.       }
  427.     }

  428.     // must contain location
  429.     return true;
  430.   }
  431. }