001/**
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.activemq.transport.discovery.multicast;
019
020import java.io.IOException;
021import java.net.DatagramPacket;
022import java.net.InetAddress;
023import java.net.InetSocketAddress;
024import java.net.InterfaceAddress;
025import java.net.MulticastSocket;
026import java.net.NetworkInterface;
027import java.net.SocketAddress;
028import java.net.SocketException;
029import java.net.SocketTimeoutException;
030import java.net.URI;
031import java.util.ArrayList;
032import java.util.Enumeration;
033import java.util.Iterator;
034import java.util.List;
035import java.util.Map;
036import java.util.concurrent.ConcurrentHashMap;
037import java.util.concurrent.ExecutorService;
038import java.util.concurrent.LinkedBlockingQueue;
039import java.util.concurrent.ThreadFactory;
040import java.util.concurrent.ThreadPoolExecutor;
041import java.util.concurrent.TimeUnit;
042import java.util.concurrent.atomic.AtomicBoolean;
043
044import org.apache.activemq.command.DiscoveryEvent;
045import org.apache.activemq.transport.discovery.DiscoveryAgent;
046import org.apache.activemq.transport.discovery.DiscoveryListener;
047import org.apache.activemq.util.ThreadPoolUtils;
048import org.slf4j.Logger;
049import org.slf4j.LoggerFactory;
050
051/**
052 * A {@link DiscoveryAgent} using a multicast address and heartbeat packets
053 * encoded using any wireformat, but openwire by default.
054 * 
055 * 
056 */
057public class MulticastDiscoveryAgent implements DiscoveryAgent, Runnable {
058
059    public static final String DEFAULT_DISCOVERY_URI_STRING = "multicast://239.255.2.3:6155";
060    public static final String DEFAULT_HOST_STR = "default"; 
061    public static final String DEFAULT_HOST_IP  = System.getProperty("activemq.partition.discovery", "239.255.2.3"); 
062    public static final int    DEFAULT_PORT  = 6155; 
063        
064    private static final Logger LOG = LoggerFactory.getLogger(MulticastDiscoveryAgent.class);
065    private static final String TYPE_SUFFIX = "ActiveMQ-4.";
066    private static final String ALIVE = "alive.";
067    private static final String DEAD = "dead.";
068    private static final String DELIMITER = "%";
069    private static final int BUFF_SIZE = 8192;
070    private static final int DEFAULT_IDLE_TIME = 500;
071    private static final int HEARTBEAT_MISS_BEFORE_DEATH = 10;
072
073    private long initialReconnectDelay = 1000 * 5;
074    private long maxReconnectDelay = 1000 * 30;
075    private long backOffMultiplier = 2;
076    private boolean useExponentialBackOff;
077    private int maxReconnectAttempts;
078
079    private int timeToLive = 1;
080    private boolean loopBackMode;
081    private Map<String, RemoteBrokerData> brokersByService = new ConcurrentHashMap<String, RemoteBrokerData>();
082    private String group = "default";
083    private URI discoveryURI;
084    private InetAddress inetAddress;
085    private SocketAddress sockAddress;
086    private DiscoveryListener discoveryListener;
087    private String selfService;
088    private MulticastSocket mcast;
089    private Thread runner;
090    private long keepAliveInterval = DEFAULT_IDLE_TIME;
091    private String mcInterface;
092    private String mcNetworkInterface;
093    private String mcJoinNetworkInterface;
094    private long lastAdvertizeTime;
095    private AtomicBoolean started = new AtomicBoolean(false);
096    private boolean reportAdvertizeFailed = true;
097    private ExecutorService executor = null;
098
099    class RemoteBrokerData extends DiscoveryEvent {
100        long lastHeartBeat;
101        long recoveryTime;
102        int failureCount;
103        boolean failed;
104
105        public RemoteBrokerData(String brokerName, String service) {
106            super(service);
107            setBrokerName(brokerName);
108            this.lastHeartBeat = System.currentTimeMillis();
109        }
110
111        public synchronized void updateHeartBeat() {
112            lastHeartBeat = System.currentTimeMillis();
113
114            // Consider that the broker recovery has succeeded if it has not
115            // failed in 60 seconds.
116            if (!failed && failureCount > 0 && (lastHeartBeat - recoveryTime) > 1000 * 60) {
117                if (LOG.isDebugEnabled()) {
118                    LOG.debug("I now think that the " + serviceName + " service has recovered.");
119                }
120                failureCount = 0;
121                recoveryTime = 0;
122            }
123        }
124
125        public synchronized long getLastHeartBeat() {
126            return lastHeartBeat;
127        }
128
129        public synchronized boolean markFailed() {
130            if (!failed) {
131                failed = true;
132                failureCount++;
133
134                long reconnectDelay;
135                if (!useExponentialBackOff) {
136                    reconnectDelay = initialReconnectDelay;
137                } else {
138                    reconnectDelay = (long)Math.pow(backOffMultiplier, failureCount);
139                    if (reconnectDelay > maxReconnectDelay) {
140                        reconnectDelay = maxReconnectDelay;
141                    }
142                }
143
144                if (LOG.isDebugEnabled()) {
145                    LOG.debug("Remote failure of " + serviceName + " while still receiving multicast advertisements.  Advertising events will be suppressed for " + reconnectDelay
146                              + " ms, the current failure count is: " + failureCount);
147                }
148
149                recoveryTime = System.currentTimeMillis() + reconnectDelay;
150                return true;
151            }
152            return false;
153        }
154
155        /**
156         * @return true if this broker is marked failed and it is now the right
157         *         time to start recovery.
158         */
159        public synchronized boolean doRecovery() {
160            if (!failed) {
161                return false;
162            }
163
164            // Are we done trying to recover this guy?
165            if (maxReconnectAttempts > 0 && failureCount > maxReconnectAttempts) {
166                if (LOG.isDebugEnabled()) {
167                    LOG.debug("Max reconnect attempts of the " + serviceName + " service has been reached.");
168                }
169                return false;
170            }
171
172            // Is it not yet time?
173            if (System.currentTimeMillis() < recoveryTime) {
174                return false;
175            }
176
177            if (LOG.isDebugEnabled()) {
178                LOG.debug("Resuming event advertisement of the " + serviceName + " service.");
179            }
180            failed = false;
181            return true;
182        }
183
184        public boolean isFailed() {
185            return failed;
186        }
187    }
188
189    /**
190     * Set the discovery listener
191     * 
192     * @param listener
193     */
194    public void setDiscoveryListener(DiscoveryListener listener) {
195        this.discoveryListener = listener;
196    }
197
198    /**
199     * register a service
200     */
201    public void registerService(String name) throws IOException {
202        this.selfService = name;
203        if (started.get()) {
204            doAdvertizeSelf();
205        }
206    }
207
208    /**
209     * @return Returns the loopBackMode.
210     */
211    public boolean isLoopBackMode() {
212        return loopBackMode;
213    }
214
215    /**
216     * @param loopBackMode The loopBackMode to set.
217     */
218    public void setLoopBackMode(boolean loopBackMode) {
219        this.loopBackMode = loopBackMode;
220    }
221
222    /**
223     * @return Returns the timeToLive.
224     */
225    public int getTimeToLive() {
226        return timeToLive;
227    }
228
229    /**
230     * @param timeToLive The timeToLive to set.
231     */
232    public void setTimeToLive(int timeToLive) {
233        this.timeToLive = timeToLive;
234    }
235
236    /**
237     * @return the discoveryURI
238     */
239    public URI getDiscoveryURI() {
240        return discoveryURI;
241    }
242
243    /**
244     * Set the discoveryURI
245     * 
246     * @param discoveryURI
247     */
248    public void setDiscoveryURI(URI discoveryURI) {
249        this.discoveryURI = discoveryURI;
250    }
251
252    public long getKeepAliveInterval() {
253        return keepAliveInterval;
254    }
255
256    public void setKeepAliveInterval(long keepAliveInterval) {
257        this.keepAliveInterval = keepAliveInterval;
258    }
259    
260    public void setInterface(String mcInterface) {
261        this.mcInterface = mcInterface;
262    }
263    
264    public void setNetworkInterface(String mcNetworkInterface) {
265        this.mcNetworkInterface = mcNetworkInterface;    
266    }
267    
268    public void setJoinNetworkInterface(String mcJoinNetwrokInterface) {
269        this.mcJoinNetworkInterface = mcJoinNetwrokInterface;
270    }
271    
272    /**
273     * start the discovery agent
274     * 
275     * @throws Exception
276     */
277    public void start() throws Exception {
278        
279        if (started.compareAndSet(false, true)) {               
280                                
281            if (group == null || group.length() == 0) {
282                throw new IOException("You must specify a group to discover");
283            }
284            String type = getType();
285            if (!type.endsWith(".")) {
286                LOG.warn("The type '" + type + "' should end with '.' to be a valid Discovery type");
287                type += ".";
288            }
289            
290            if (discoveryURI == null) {
291                discoveryURI = new URI(DEFAULT_DISCOVERY_URI_STRING);
292            }
293            
294            if (LOG.isTraceEnabled()) {
295                LOG.trace("start - discoveryURI = " + discoveryURI);
296            }
297
298            String myHost = discoveryURI.getHost();
299            int    myPort = discoveryURI.getPort();
300
301            if (DEFAULT_HOST_STR.equals(myHost)) {
302                myHost = DEFAULT_HOST_IP;
303            }
304
305            if (myPort < 0) {
306                myPort = DEFAULT_PORT;
307            }
308
309            if (LOG.isTraceEnabled()) {
310                LOG.trace("start - myHost = " + myHost);
311                LOG.trace("start - myPort = " + myPort);
312                LOG.trace("start - group  = " + group);
313                LOG.trace("start - interface  = " + mcInterface);
314                LOG.trace("start - network interface  = " + mcNetworkInterface);
315                LOG.trace("start - join network interface  = " + mcJoinNetworkInterface);
316            }
317
318            this.inetAddress = InetAddress.getByName(myHost);
319            this.sockAddress = new InetSocketAddress(this.inetAddress, myPort);
320            mcast = new MulticastSocket(myPort);
321            mcast.setLoopbackMode(loopBackMode);
322            mcast.setTimeToLive(getTimeToLive());
323            if (mcJoinNetworkInterface != null) {
324                mcast.joinGroup(sockAddress, NetworkInterface.getByName(mcJoinNetworkInterface));
325            }
326            else {
327                mcast.setNetworkInterface(findNetworkInterface());
328                mcast.joinGroup(inetAddress);
329            }
330            mcast.setSoTimeout((int)keepAliveInterval);
331            if (mcInterface != null) {
332                mcast.setInterface(InetAddress.getByName(mcInterface));
333            }
334            if (mcNetworkInterface != null) {
335                mcast.setNetworkInterface(NetworkInterface.getByName(mcNetworkInterface));
336            }
337            runner = new Thread(this);
338            runner.setName(this.toString() + ":" + runner.getName());
339            runner.setDaemon(true);
340            runner.start();
341            doAdvertizeSelf();
342        }
343    }
344    
345    private NetworkInterface findNetworkInterface() throws SocketException {
346        Enumeration<NetworkInterface> ifcs = NetworkInterface.getNetworkInterfaces();
347        List<NetworkInterface> possibles = new ArrayList<NetworkInterface>();
348        while (ifcs.hasMoreElements()) {
349            NetworkInterface ni = ifcs.nextElement();
350            try {
351                if (ni.supportsMulticast()
352                        && ni.isUp()) {
353                    for (InterfaceAddress ia : ni.getInterfaceAddresses()) {
354                        if (ia != null && ia.getAddress() instanceof java.net.Inet4Address
355                                && !ia.getAddress().isLoopbackAddress()
356                                && (ni.getDisplayName()==null || !ni.getDisplayName().startsWith("vnic"))) {
357                            possibles.add(ni);
358                        }
359                    }
360                }
361            } catch (SocketException ignored) {}
362        }
363        return possibles.isEmpty() ? null : possibles.get(possibles.size() - 1);
364    }
365
366    /**
367     * stop the channel
368     * 
369     * @throws Exception
370     */
371    public void stop() throws Exception {
372        if (started.compareAndSet(true, false)) {
373            doAdvertizeSelf();
374            if (mcast != null) {
375                mcast.close();
376            }
377            if (runner != null) {
378                runner.interrupt();
379            }
380            if (executor != null) {
381                ThreadPoolUtils.shutdownNow(executor);
382                executor = null;
383            }
384        }
385    }
386
387    public String getType() {
388        return group + "." + TYPE_SUFFIX;
389    }
390
391    public void run() {
392        byte[] buf = new byte[BUFF_SIZE];
393        DatagramPacket packet = new DatagramPacket(buf, 0, buf.length);
394        while (started.get()) {
395            doTimeKeepingServices();
396            try {
397                mcast.receive(packet);
398                if (packet.getLength() > 0) {
399                    String str = new String(packet.getData(), packet.getOffset(), packet.getLength());
400                    processData(str);
401                }
402            } catch (SocketTimeoutException se) {
403                // ignore
404            } catch (IOException e) {
405                if (started.get()) {
406                    LOG.error("failed to process packet: " + e);
407                }
408            }
409        }
410    }
411
412    private void processData(String str) {
413        if (discoveryListener != null) {
414            if (str.startsWith(getType())) {
415                String payload = str.substring(getType().length());
416                if (payload.startsWith(ALIVE)) {
417                    String brokerName = getBrokerName(payload.substring(ALIVE.length()));
418                    String service = payload.substring(ALIVE.length() + brokerName.length() + 2);
419                    processAlive(brokerName, service);
420                } else {
421                    String brokerName = getBrokerName(payload.substring(DEAD.length()));
422                    String service = payload.substring(DEAD.length() + brokerName.length() + 2);
423                    processDead(service);
424                }
425            }
426        }
427    }
428
429    private void doTimeKeepingServices() {
430        if (started.get()) {
431            long currentTime = System.currentTimeMillis();
432            if (currentTime < lastAdvertizeTime || ((currentTime - keepAliveInterval) > lastAdvertizeTime)) {
433                doAdvertizeSelf();
434                lastAdvertizeTime = currentTime;
435            }
436            doExpireOldServices();
437        }
438    }
439
440    private void doAdvertizeSelf() {
441        if (selfService != null) {
442            String payload = getType();
443            payload += started.get() ? ALIVE : DEAD;
444            payload += DELIMITER + "localhost" + DELIMITER;
445            payload += selfService;
446            try {
447                byte[] data = payload.getBytes();
448                DatagramPacket packet = new DatagramPacket(data, 0, data.length, sockAddress);
449                mcast.send(packet);
450            } catch (IOException e) {
451                // If a send fails, chances are all subsequent sends will fail
452                // too.. No need to keep reporting the
453                // same error over and over.
454                if (reportAdvertizeFailed) {
455                    reportAdvertizeFailed = false;
456                    LOG.error("Failed to advertise our service: " + payload, e);
457                    if ("Operation not permitted".equals(e.getMessage())) {
458                        LOG.error("The 'Operation not permitted' error has been know to be caused by improper firewall/network setup.  "
459                                  + "Please make sure that the OS is properly configured to allow multicast traffic over: " + mcast.getLocalAddress());
460                    }
461                }
462            }
463        }
464    }
465
466    private void processAlive(String brokerName, String service) {
467        if (selfService == null || !service.equals(selfService)) {
468            RemoteBrokerData data = brokersByService.get(service);
469            if (data == null) {
470                data = new RemoteBrokerData(brokerName, service);
471                brokersByService.put(service, data);      
472                fireServiceAddEvent(data);
473                doAdvertizeSelf();
474            } else {
475                data.updateHeartBeat();
476                if (data.doRecovery()) {
477                    fireServiceAddEvent(data);
478                }
479            }
480        }
481    }
482
483    private void processDead(String service) {
484        if (!service.equals(selfService)) {
485            RemoteBrokerData data = brokersByService.remove(service);
486            if (data != null && !data.isFailed()) {
487                fireServiceRemovedEvent(data);
488            }
489        }
490    }
491
492    private void doExpireOldServices() {
493        long expireTime = System.currentTimeMillis() - (keepAliveInterval * HEARTBEAT_MISS_BEFORE_DEATH); 
494        for (Iterator<RemoteBrokerData> i = brokersByService.values().iterator(); i.hasNext();) {
495            RemoteBrokerData data = i.next();
496            if (data.getLastHeartBeat() < expireTime) {
497                processDead(data.getServiceName());
498            }
499        }
500    }
501
502    private String getBrokerName(String str) {
503        String result = null;
504        int start = str.indexOf(DELIMITER);
505        if (start >= 0) {
506            int end = str.indexOf(DELIMITER, start + 1);
507            result = str.substring(start + 1, end);
508        }
509        return result;
510    }
511
512    public void serviceFailed(DiscoveryEvent event) throws IOException {
513        RemoteBrokerData data = brokersByService.get(event.getServiceName());
514        if (data != null && data.markFailed()) {
515            fireServiceRemovedEvent(data);
516        }
517    }
518
519    private void fireServiceRemovedEvent(final RemoteBrokerData data) {
520        if (discoveryListener != null && started.get()) {
521            // Have the listener process the event async so that
522            // he does not block this thread since we are doing time sensitive
523            // processing of events.
524            getExecutor().execute(new Runnable() {
525                public void run() {
526                    DiscoveryListener discoveryListener = MulticastDiscoveryAgent.this.discoveryListener;
527                    if (discoveryListener != null) {
528                        discoveryListener.onServiceRemove(data);
529                    }
530                }
531            });
532        }
533    }
534
535    private void fireServiceAddEvent(final RemoteBrokerData data) {
536        if (discoveryListener != null && started.get()) {
537
538            // Have the listener process the event async so that
539            // he does not block this thread since we are doing time sensitive
540            // processing of events.
541            getExecutor().execute(new Runnable() {
542                public void run() {
543                    DiscoveryListener discoveryListener = MulticastDiscoveryAgent.this.discoveryListener;
544                    if (discoveryListener != null) {
545                        discoveryListener.onServiceAdd(data);
546                    }
547                }
548            });
549        }
550    }
551
552    private ExecutorService getExecutor() {
553        if (executor == null) {
554            final String threadName = "Notifier-" + this.toString();
555            executor = new ThreadPoolExecutor(1, 1, 30, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>(), new ThreadFactory() {
556                public Thread newThread(Runnable runable) {
557                    Thread t = new Thread(runable,  threadName);
558                    t.setDaemon(true);
559                    return t;
560                }
561            });
562        }
563        return executor;
564    }
565
566    public long getBackOffMultiplier() {
567        return backOffMultiplier;
568    }
569
570    public void setBackOffMultiplier(long backOffMultiplier) {
571        this.backOffMultiplier = backOffMultiplier;
572    }
573
574    public long getInitialReconnectDelay() {
575        return initialReconnectDelay;
576    }
577
578    public void setInitialReconnectDelay(long initialReconnectDelay) {
579        this.initialReconnectDelay = initialReconnectDelay;
580    }
581
582    public int getMaxReconnectAttempts() {
583        return maxReconnectAttempts;
584    }
585
586    public void setMaxReconnectAttempts(int maxReconnectAttempts) {
587        this.maxReconnectAttempts = maxReconnectAttempts;
588    }
589
590    public long getMaxReconnectDelay() {
591        return maxReconnectDelay;
592    }
593
594    public void setMaxReconnectDelay(long maxReconnectDelay) {
595        this.maxReconnectDelay = maxReconnectDelay;
596    }
597
598    public boolean isUseExponentialBackOff() {
599        return useExponentialBackOff;
600    }
601
602    public void setUseExponentialBackOff(boolean useExponentialBackOff) {
603        this.useExponentialBackOff = useExponentialBackOff;
604    }
605
606    public void setGroup(String group) {
607        this.group = group;
608    }
609    
610    @Override
611    public String toString() {
612        return  "MulticastDiscoveryAgent-"
613            + (selfService != null ? "advertise:" + selfService : "listener:" + this.discoveryListener);
614    }
615}