1. J C Sancho, Antonio Robles and Jose Duato. A flexible routing scheme for networks of workstations. 2000, 260 - 7. BibTeX

    @conference{ 6977552,
    	author = "J.C. Sancho and Robles, Antonio and Duato, Jose",
    	abstract = "NOW are arranged as a switch-based network which allows the layout of both regular and irregular topologies. However, the irregular pattern interconnect makes routing and deadlock avoidance quite complicated. Current proposals use the up*/down* routing algorithm to remove cyclic dependencies between channels and avoid deadlock. Recently, a simple and effective methodology to compute up*/down* routing tables has been proposed by us. The resulting routing algorithm is very effective in irregular topologies. However, its behavior is very poor in regular networks with orthogonal dimensions. Therefore, we propose a more flexible routing scheme that is effective in both regular and irregular topologies. Unlike up*/down* routing algorithms, the proposed routing algorithm breaks cycles at different nodes for each direction in the cycle, thus providing better traffic balancing than that provided by up*/down* routing algorithms. Evaluation results modeling a Myrinet network show that the new routing algorithm increases throughput with respect to the original up*/down* routing algorithm by a factor of up to 3.5 for regular networks, also maintaining the performance of the improved up*/down* routing scheme proposed in Sancho et al., (2000), when applied to irregular networks",
    	address = "Berlin, Germany",
    	journal = "High Performance Computing. Third International Symposium, ISHPC 2000. Proceedings (Lecture Notes in Computer Science Vol.1940)",
    	keywords = "concurrency control;network routing;network topology;performance evaluation;workstation clusters;",
    	note = "networks of workstations;routing scheme;NOW;regular topologies;irregular topologies;deadlock avoidance;traffic balancing;Myrinet network;performance;",
    	pages = "260 - 7",
    	title = "{A} flexible routing scheme for networks of workstations",
    	year = 2000
    }
    
  2. Maria E Gomez and Vicente Santonja. A new approach in the analysis and modeling of disk access patterns. 2000, 172 - 7. URL BibTeX

    @conference{ 6598374,
    	author = "Gomez, Maria E. and Santonja, Vicente",
    	abstract = "While in previous work we have demonstrated that disk arrival patterns are consistent with self-similarity and have provided a physical explanation for the self-similar phenomenon in disk arrival patterns, the authors now deal with the analysis and modeling of disk access patterns. We provide visual and mathematical evidence showing that the same bursty behavior observed in the time series can also be observed in the spatial series formed by the starting address of the accessed blocks. Moreover, we demonstrate that the clustering of accesses in areas of the disk has self-similar characteristics. Next, we demonstrate that the observed behavior can be explained using the same self-similar property used in the analysis of arrival patterns and applying the same physical explanation",
    	address = "Piscataway, NJ, USA",
    	journal = "2000 IEEE International Symposium on Performance Analysis of Systems and Software. ISPASS (Cat. No.00EX422)",
    	keywords = "disc storage;input-output programs;storage management;time series;",
    	note = "disk access pattern analysis;disk arrival patterns;self-similarity;self-similar phenomenon;mathematical evidence;bursty behavior;time series;spatial series;starting address;accessed blocks;self-similar property;arrival patterns;physical explanation;",
    	pages = "172 - 7",
    	title = "{A} new approach in the analysis and modeling of disk access patterns",
    	url = "http://dx.doi.org/10.1109/ISPASS.2000.842297",
    	year = 2000
    }
    
  3. Maria E Gomez and Vicente Santonja. A new approach in the modeling and generation of synthetic disk workload. In Modeling, Analysis and Simulation of Computer and Telecommunication Systems, 2000. Proceedings. 8th International Symposium on. August 2000, 199 - 206. URL, DOI BibTeX

    @conference{ 6735484,
    	author = "Gomez, Maria E. and Santonja, Vicente",
    	abstract = "Shows a new approach to generate synthetic disk workload. The work presented is based on previous results obtained from the analysis of real disk traces. The proposed disk workload generation model can capture the heavy-tailed behavior of real disk workload, a critical feature to reproduce disk subsystem congestion. The generator provides synthetic workload much more accurately than commonly-used models. Since the workload plays a critical role in performance evaluations, having a more accurate disk workload generator is important for storage researchers in order to obtain fair and unbiased performance predictions",
    	address = "San Francisco, CA, USA",
    	booktitle = "Modeling, Analysis and Simulation of Computer and Telecommunication Systems, 2000. Proceedings. 8th International Symposium on",
    	doi = "10.1109/MASCOT.2000.876445",
    	isbn = "0-7695-0728-X",
    	journal = "Proceedings 8th International Symposium on Modeling, Analysis and Simulation of Computer and Telecommunication Systems (Cat. No.PR00728)",
    	keywords = "disc storage;performance evaluation;",
    	month = "Aug",
    	note = "synthetic disk workload modelling;synthetic disk workload generation;disk traces;disk workload generation model;heavy-tailed behavior;disk subsystem congestion;performance evaluation;storage research;unbiased performance prediction;",
    	pages = "199 - 206",
    	title = "{A} new approach in the modeling and generation of synthetic disk workload",
    	url = "http://dx.doi.org/10.1109/MASCOT.2000.876445",
    	year = 2000
    }
    
  4. G Bernabe, J Gonzalez, J M Garcia and Jose Duato. A new lossy 3-D wavelet transform for high-quality compression of medical video. 2000, 226 - 31. URL BibTeX

    @conference{ 6806067,
    	author = "G. Bernabe and J. Gonzalez and J.M. Garcia and Duato, Jose",
    	abstract = "The authors present a new compression scheme based on applying the 3D Fast Wavelet Transform, to code medical video. This video has special features such as its representation in gray scale, the small amount of interframe variations, and the quality requirements of the reconstructed images. These characteristics as well as the social impact of desired applications deserve the design and implementation of coding schemes especially oriented to exploit its features. We analyze different parameters of the codification process, such as the utilization of different wavelet functions, the number of steps this function is applied, the way the thresholds are chosen, and the selected methods in the quantization and entropy encoder. Our coder achieves a good trade-off between compression ratio and quality of the reconstructed video. These results are better than MPEG-2, without the complexity of motion compensation",
    	address = "Piscataway, NJ, USA",
    	journal = "Proceedings 2000 IEEE EMBS International Conference on Information Technology Applications in Biomedicine. ITAB-ITIS 2000. Joint Meeting Third IEEE EMBS International Conference on Information Technology Applications in Biomedicine (ITAB'00). Third Worksh",
    	keywords = "data compression;medical image processing;telemedicine;video coding;wavelet transforms;",
    	note = "lossy 3D wavelet transform;high-quality compression;medical video compression;compression scheme;3D Fast Wavelet Transform;medical video coding;interframe variations;quality requirements;reconstructed images;social impact;coding schemes;codification process;wavelet functions;quantization;entropy encoder;compression ratio;reconstructed video;MPEG-2;motion compensation;",
    	pages = "226 - 31",
    	title = "{A} new lossy 3-{D} wavelet transform for high-quality compression of medical video",
    	url = "http://dx.doi.org/10.1109/ITAB.2000.892391",
    	year = 2000
    }
    
  5. JC Sancho, Antonio Robles and Jose Duato. A new methodology to compute deadlock-free routing tables for irregular networks. In B Falsafi and M Lauria (eds.). NETWORK-BASED PARALLEL COMPUTING, PROCEEDINGS - COMMUNICATION, ARCHITECTURE, AND APPLICATIONS 1797. 2000, 45-60. BibTeX

    @conference{ isi:000171691200004,
    	author = "JC Sancho and Robles, Antonio and Duato, Jose",
    	abstract = "Networks of workstations (NOWs) are being considered as a cost-effective alternative to parallel computers. Many NOWs are arranged as a switch-based network with irregular topology, which makes routing and deadlock avoidance quite complicated. Current proposals use the up{*}/down{*} routing algorithm to remove cyclic dependencies between channels and avoid deadlock. However, routing is considerably restricted and most messages must follow non-minimal paths, increasing latency and wasting resources. In this paper, we propose a new methodology to compute deadlock-free routing tables for NOWs. The methodology tries to minimize the limitations of the current proposals in order to improve network performance. It is based on generating an underlying acyclic connected graph from the network graph and assigning a sequence number to each switch, which is used to remove cyclic dependencies. Evaluation results show that the routing algorithm based on the new methodology increases throughput by a factor of up to 2 in large networks, also reducing latency significantly.",
    	booktitle = "NETWORK-BASED PARALLEL COMPUTING, PROCEEDINGS - COMMUNICATION, ARCHITECTURE, AND APPLICATIONS",
    	editor = "Falsafi, B and Lauria, M",
    	isbn = 3540678794,
    	issn = "0302-9743",
    	note = "4th International Workshop on Communication, Architecture, and Applications for Network-Based Parallel Computing (CANPC 2000), TOULOUSE, FRANCE, JAN 08, 2000",
    	pages = "45-60",
    	series = "LECTURE NOTES IN COMPUTER SCIENCE",
    	title = "{A} new methodology to compute deadlock-free routing tables for irregular networks",
    	volume = 1797,
    	year = 2000
    }
    
  6. J C Sancho, Antonio Robles and Jose Duato. A new methodology to compute deadlock-free routing tables for irregular networks. 2000, 45 - 60. BibTeX

    @conference{ 6826449,
    	author = "J.C. Sancho and Robles, Antonio and Duato, Jose",
    	abstract = "Networks of workstations (NOWs) are being considered as a cost-effective alternative to parallel computers. Many NOWs are arranged as a switch-based network with irregular topology, which makes routing and deadlock avoidance quite complicated. Current proposals use the up*/down* routing algorithm to remove cyclic dependencies between channels and avoid deadlock. However, routing is considerably restricted and most messages must follow non-minimal paths, increasing latency and wasting resources. In this paper, we propose a new methodology to compute deadlock-free routing tables for NOWs. The methodology tries to minimize the limitations of the current proposals in order to improve network performance. It is based on generating an underlying acyclic connected graph from the network graph and assigning a sequence number to each switch, which is used to remove cyclic dependencies. Evaluation results show that the routing algorithm based on the new methodology increases throughput by a factor of up to 2 in large networks, also reducing latency significantly",
    	address = "Berlin, Germany",
    	journal = "Network-Based Parallel Computing. Communication, Architecture, and Applications. 4th International Workshop, CANPC 2000. Proceedings (Lecture Notes in Computer Science Vol.1797)",
    	keywords = "performance evaluation;workstation clusters;",
    	note = "deadlock-free routing tables;irregular networks;networks of workstations;switch-based network;irregular topology;routing;deadlock avoidance;cyclic dependencies;latency;network performance;acyclic connected graph;network graph;",
    	pages = "45 - 60",
    	title = "{A} new methodology to compute deadlock-free routing tables for irregular networks",
    	year = 2000
    }
    
  7. Elvira Baydal, Pedro Lopez and Jose Duato. A simple and efficient mechanism to prevent saturation in wormhole networks. In Parallel and Distributed Processing Symposium, 2000. IPDPS 2000. Proceedings. 14th International. 2000, 617 -622. URL, DOI BibTeX

    @conference{ 846043,
    	author = "Baydal, Elvira and Lopez, Pedro and Duato, Jose",
    	abstract = "Both deadlock avoidance and recovery techniques suffer from severe performance degradation when the network is close to or beyond saturation. This performance degradation appears because messages block in the network faster than they are drained by the escape paths in the deadlock avoidance strategies or the deadlock recovery mechanism. Many parallel applications produce bursty traffic that may saturate the network during some intervals, significantly increasing execution time. Therefore, the use of techniques that prevent network saturation are of crucial importance. Although several mechanisms have been proposed in the literature to reach this goal, some of them introduce some penalty when the network is not fully saturated, require complex hardware to be implemented or do not behave well under all network load conditions. In this paper we propose a new mechanism to avoid network saturation that overcomes these drawbacks",
    	booktitle = "Parallel and Distributed Processing Symposium, 2000. IPDPS 2000. Proceedings. 14th International",
    	doi = "10.1109/IPDPS.2000.846043",
    	keywords = "deadlock avoidance;deadlock recovery;network saturation;performance degradation;wormhole networks;computer networks;concurrency control;multiprocessor interconnection networks;",
    	pages = "617 -622",
    	title = "{A} simple and efficient mechanism to prevent saturation in wormhole networks",
    	url = "http://dx.doi.org/10.1109/IPDPS.2000.846043",
    	year = 2000
    }
    
  8. Elvira Baydal, Pedro Lopez and Jose Duato. A simple and efficient mechanism to prevent saturation in wormhole networks. 2000, 617 - 22. URL BibTeX

    @conference{ 6590366,
    	author = "Baydal, Elvira and Lopez, Pedro and Duato, Jose",
    	abstract = "Both deadlock avoidance and recovery techniques suffer from severe performance degradation when the network is close to or beyond saturation. This performance degradation appears because messages block in the network faster than they are drained by the escape paths in the deadlock avoidance strategies or the deadlock recovery mechanism. Many parallel applications produce bursty traffic that may saturate the network during some intervals, significantly increasing execution time. Therefore, the use of techniques that prevent network saturation are of crucial importance. Although several mechanisms have been proposed in the literature to reach this goal, some of them introduce some penalty when the network is not fully saturated, require complex hardware to be implemented or do not behave well under all network load conditions. In this paper we propose a new mechanism to avoid network saturation that overcomes these drawbacks",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings 14th International Parallel and Distributed Processing Symposium. IPDPS 2000",
    	keywords = "computer networks;concurrency control;multiprocessor interconnection networks;",
    	note = "wormhole networks;deadlock avoidance;performance degradation;deadlock recovery;network saturation;",
    	pages = "617 - 22",
    	title = "{A} simple and efficient mechanism to prevent saturation in wormhole networks",
    	url = "http://dx.doi.org/10.1109/IPDPS.2000.846043",
    	year = 2000
    }
    
  9. Rosa Alcover, Vicente Chirivella and Jose Duato. An accurate analysis of reliability parameters in meshes with fault-tolerant adaptive routing. In Parallel Architectures, Algorithms and Networks, 2000. I-SPAN 2000. Proceedings. International Symposium on. December 2000, 88 - 93. URL, DOI BibTeX

    @conference{ 6832471,
    	author = "Alcover, Rosa and Chirivella, Vicente and Duato, Jose",
    	abstract = "The traditional approach to study fault-tolerance in multicomputer interconnection networks consists of determining the worst possible combination of faulty components that causes a network failure, and then assuming that this will occur. But the worst possible combination does not always occur, and the routing algorithm allows the network to work in the presence of a greater number of failures. The network reliability parameters computed according to the traditional approach will be under-estimated. In this paper we use a new methodology to compute accurately the reliability and availability functions. The reliability parameters have been computed for a network with mesh topology, taking into account size, routing algorithm, failure and repair rates of the network channels and coverage",
    	booktitle = "Parallel Architectures, Algorithms and Networks, 2000. I-SPAN 2000. Proceedings. International Symposium on",
    	doi = "10.1109/ISPAN.2000.900267",
    	isbn = "0-7695-0936-3",
    	journal = "Proceedings International Symposium on Parallel Architectures, Algorithms and Networks. I-SPAN 2000",
    	keywords = "fault tolerant computing;multiprocessor interconnection networks;network routing;",
    	month = "Dec",
    	note = "reliability parameters;meshes;fault-tolerant adaptive routing;multicomputer interconnection networks;faulty components;network failure;routing algorithm;network reliability parameters;mesh topology;network channels;",
    	pages = "88 - 93",
    	title = "{A}n accurate analysis of reliability parameters in meshes with fault-tolerant adaptive routing",
    	url = "http://dx.doi.org/10.1109/ISPAN.2000.900267",
    	year = 2000
    }
    
  10. M P Malumbres and Jose Duato. An efficient implementation of tree-based multicast routing for distributed shared-memory multiprocessors. Journal of Systems Architecture 46(11):1019 - 32, 2000. URL BibTeX

    @article{ 6711597,
    	author = "M.P. Malumbres and Duato, Jose",
    	abstract = "This paper presents an efficient routing and flow control mechanism to implement multidestination message passing in wormhole networks. The mechanism is a variation of tree-based multicast with pruning to recover from deadlocks and it is well suited for distributed shared-memory multiprocessors (DSMs) with hardware cache coherence. It does not require any preprocessing of multicast messages reducing notably the software overhead required to send a multicast message. Also, it allows messages to use any deadlock-free routing function. The new scheme has been evaluated by simulation using synthetic loads. It achieves multicast latency reductions of 30% on average. Also it was compared with other multicast mechanisms proving its benefits. Finally, it can be easily implemented in hardware with minimal changes to existing unicast wormhole routers",
    	address = "Netherlands",
    	issn = "1383-7621",
    	journal = "Journal of Systems Architecture",
    	keywords = "distributed shared memory systems;message passing;multiprocessor interconnection networks;network routing;",
    	note = "tree-based multicast routing;distributed shared-memory multiprocessors;flow control mechanism;multidestination message passing;wormhole networks;tree-based multicast;deadlocks;hardware cache coherence;multicast messages;software overhead;deadlock-free routing function;synthetic loads;multicast latency reductions;unicast wormhole routers;",
    	number = 11,
    	pages = "1019 - 32",
    	title = "{A}n efficient implementation of tree-based multicast routing for distributed shared-memory multiprocessors",
    	url = "http://dx.doi.org/10.1016/S1383-7621(00)00007-2",
    	volume = 46,
    	year = 2000
    }
    
  11. D Buntinas, D K Panda, Jose Duato and P Sadayappan. Broadcast/multicast over Myrinet using NIC-assisted multidestination messages. 2000, 115 - 29. BibTeX

    @conference{ 6826454,
    	author = "D. Buntinas and D.K. Panda and Duato, Jose and P. Sadayappan",
    	abstract = "Broadcasting and multicasting are common operations in parallel and distributed programs. Some modern Network Interface Cards (NICs) have programmable processors which can be used to provide support for these operations. However these processors are 5-15 times slower than the host processor. In this paper we propose a design and an implementation of a multi-send primitive to support efficient broadcast/multicast that requires minimal assistance from the NIC. Our scheme is designed with the idea that as much processing as possible should be done by the host processor. This gives us more flexibility with, for example, creating multicast trees which would be optimal for a particular message size, or choosing a multicast tree dynamically based on requirements of bandwidth versus latency for a particular message. We have designed a multi-send primitive and implemented it as an addition to Fast-Messages (FM) 2.1 running over a Myrinet network. The proposed scheme does less processing at the NIC. The impact of adding such NIC-assisted multicast operation to a run-time system is also very small, less than 500 ns for non-multi-send packets. To fully, utilize the benefits of this primitive, we propose a method for constructing an optimal multicast tree using the new primitive. We have evaluated this scheme and obtained a speedup factor of up to 1.85 for multicasting 16 K messages with 16 nodes",
    	address = "Berlin, Germany",
    	journal = "Network-Based Parallel Computing. Communication, Architecture, and Applications. 4th International Workshop, CANPC 2000. Proceedings (Lecture Notes in Computer Science Vol.1797)",
    	keywords = "computer networks;multicast communication;network interfaces;",
    	note = "Myrinet;multidestination messages;Network Interface Cards;multi-send primitive;multicast tree;speedup factor;",
    	pages = "115 - 29",
    	title = "{B}roadcast/multicast over {M}yrinet using {NIC}-assisted multidestination messages",
    	year = 2000
    }
    
  12. V Yarmolenko, Jose Duato, D K Panda and P Sadayappan. Characterization and enhancement of dynamic mapping heuristics for heterogeneous systems. 2000, 437 - 44. URL BibTeX

    @conference{ 6728039,
    	author = "V. Yarmolenko and Duato, Jose and D.K. Panda and P. Sadayappan",
    	abstract = "Clusters of heterogeneous PCs/workstations have become attractive systems for executing a set of computationally intensive independent tasks. This paper focuses on scheduling schemes in a dynamic context - i.e. where scheduling decisions are made periodically as jobs arrive, in contrast to static scheduling where scheduling is performed after all jobs have been submitted. The paper characterizes different scheduling schemes with respect to varying arrival rates and burstiness in the job arrival rate. Using the insights gained by the characterization, a set of approaches are proposed to improve the previously developed strategies with respect to turnaround time. Simulation results indicate improvements of up to 40% in turnaround time by using the proposed enhancements",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings 2000. International Workshop on Parallel Processing",
    	keywords = "performance evaluation;processor scheduling;queueing theory;workstation clusters;",
    	note = "dynamic mapping heuristics;heterogeneous PC clusters;heterogeneous workstation clusters;computationally intensive independent tasks;task scheduling schemes;periodic scheduling decisions;bursty job arrival rate;turnaround time;simulation;performance evaluation;",
    	pages = "437 - 44",
    	title = "{C}haracterization and enhancement of dynamic mapping heuristics for heterogeneous systems",
    	url = "http://dx.doi.org/10.1109/ICPPW.2000.869149",
    	year = 2000
    }
    
  13. P Holenarsipur, V Yarmolenko, Jose Duato, D K Panda and P Sadayappan. Characterization and enhancement of static mapping heuristics for heterogeneous systems. 2000, 37 - 48. BibTeX

    @conference{ 6984097,
    	author = "P. Holenarsipur and V. Yarmolenko and Duato, Jose and D.K. Panda and P. Sadayappan",
    	abstract = "Heterogeneous computing environments have become attractive platforms to schedule computationally intensive jobs. We consider the problem of mapping independent tasks onto machines in a heterogeneous environment where expected execution time of each task on each machine is known. Although this problem has been much studied in the past, we derive new insights into the effectiveness of different mapping heuristics by use of two metrics-efficacy (E) and utilization (U). Whereas there is no consistent rank ordering of the various previously proposed mapping heuristics on the basis of total task completion time, we find a very consistent rank ordering of the mapping schemes with respect to the new metrics. Minimization of total completion time requires maximization of the product E{{\&}}times;U. Using the insights provided by the metrics, we develop a new matching heuristic that produces high-quality mappings using much less time than the most effective previously proposed schemes",
    	address = "Berlin, Germany",
    	journal = "High Performance Computing - HiPC 2000. 7th International Conference. Proceedings (Lecture Notes in Computer Science Vol.1970)",
    	keywords = "performance evaluation;processor scheduling;resource allocation;workstation clusters;",
    	note = "static mapping heuristics;heterogeneous systems;heterogeneous computing;cluster computing;scheduling;task assignment;mapping heuristics;performance evaluation;",
    	pages = "37 - 48",
    	title = "{C}haracterization and enhancement of static mapping heuristics for heterogeneous systems",
    	year = 2000
    }
    
  14. J M Orduna, Vicente Arnau and Jose Duato. Characterization of communications between processes in message-passing applications. 2000, 91 - 8. URL BibTeX

    @conference{ 6805977,
    	author = "J.M. Orduna and Arnau, Vicente and Duato, Jose",
    	abstract = "Many research activities have focused on the problem of task scheduling in heterogeneous systems from the computational point of view. However, an ideal scheduling strategy would also take into account the communication requirements of the applications and the communication bandwidth available in the network. One of the major problems to be solved in the development of this scheduling strategy is precisely the measurement of the communication requirements for each application. We propose a clustering-based method to characterize the communications between processes generated by message-passing applications. This technique provides a model consisting of several partitions of the processes generated by the application. Also, we propose a criterion to measure the quality of the obtained partitions. This approach can be used when a given application is repeatedly executed with different input data. Results show that the proposed method can provide a partition with the highest ratio between the intracluster and the intercluster required communication bandwidth. This partition can be used to map groups of processes to processors in the heterogeneous system",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings IEEE International Conference on Cluster Computing. CLUSTER 2000",
    	keywords = "communication complexity;message passing;parallel programming;processor scheduling;workstation clusters;",
    	note = "interprocess communication;message passing applications;task scheduling;heterogeneous systems;clustering-based method;intracluster communication bandwidth;intercluster communication bandwidth;",
    	pages = "91 - 8",
    	title = "{C}haracterization of communications between processes in message-passing applications",
    	url = "http://dx.doi.org/10.1109/CLUSTR.2000.889009",
    	year = 2000
    }
    
  15. Jose Flich, Pedro Lopez, M P Malumbres, Jose Duato and T Rokicki. Combining in-transit buffers with optimized routing schemes to boost the performance of networks with source routing. 2000, 300 - 9. BibTeX

    @conference{ 6977557,
    	author = "Flich, Jose and Lopez, Pedro and M.P. Malumbres and Duato, Jose and T. Rokicki",
    	abstract = "In previous papers we proposed the ITB mechanism to improve the performance of up*/down* routing in irregular networks with source routing. With this mechanism, both minimal routing and a better use of network links are guaranteed, resulting on an overall network performance improvement. In this paper, we show that the ITB mechanism can be used with any source routing scheme in the NOW environment. In particular, we apply ITB to DFS and Smart routing algorithms, which provide better routes than up*/down* routing. Results show that ITB strongly improves DFS (by 63%, for 64-switch networks) and Smart throughput (23%, for 32-switch networks)",
    	address = "Berlin, Germany",
    	journal = "High Performance Computing. Third International Symposium, ISHPC 2000. Proceedings (Lecture Notes in Computer Science Vol.1940)",
    	keywords = "buffer storage;network routing;performance evaluation;workstation clusters;",
    	note = "in-transit buffers;optimized routing schemes;network performance;source routing;ITB mechanism;NOW;Smart routing algorithm;DFS routing algorithm;",
    	pages = "300 - 9",
    	title = "{C}ombining in-transit buffers with optimized routing schemes to boost the performance of networks with source routing",
    	year = 2000
    }
    
  16. Xavier Molero, Federico Silla, F Rodriguez and Vicente Santonja. Design and implementation of a simulation tool for networks of workstations. 2000, 154 - 9. BibTeX

    @conference{ 6804250,
    	author = "Molero, Xavier and Silla, Federico and F. Rodriguez and Santonja, Vicente",
    	abstract = "Networks of workstations (NOWs) are rapidly emerging as a cost-effective alternative to parallel computers. In order to evaluate their performance, it is necessary to use adequate tools. Performance evaluation may be based on several types of modeling techniques (analytical modeling, simulation modeling, prototyping). Each of them differs in their scope and applicability. However, the simulation modeling technique offers more freedom and flexibility than the other methods. Thus, when evaluating the performance of NOWs, simulation modeling is often used because it provides a convenient and reliable way for such studies. We have implemented a very flexible and easy to use NOW simulator. The authors present a brief description of the employed simulation language, the internal design of the implemented tool, along with fragments of the code for simulating the behavior of the modeled system elements",
    	address = "San Diego, CA, USA",
    	journal = "Proceedings of the High Performance Computing Symposium - HPC 2000",
    	keywords = "discrete event simulation;performance evaluation;virtual machines;workstation clusters;",
    	note = "simulation tool;networks of workstations;NOWs;modeling techniques;performance evaluation;analytical modeling;simulation modeling;prototyping;simulation language;model design;discrete simulation;hierarchical modeling;arrival generation;",
    	pages = "154 - 9",
    	title = "{D}esign and implementation of a simulation tool for networks of workstations",
    	year = 2000
    }
    
  17. Julio Sahuquillo and A Pont. Designing competitive coherence protocols taking advantage of reuse information. In Euromicro Conference, 2000. Proceedings of the 26th 1. 2000, 378 -385. URL, DOI BibTeX

    @conference{ 874656,
    	author = "Sahuquillo, Julio and A. Pont",
    	abstract = "The filter data cache scheme introduces two independent Ll data caches with different organizations placed in parallel. In this scheme, each cache block has a small counter attached for storing information needed for management-called reuse information. The Filter Data Cache micro-architecture offers lower miss rates and better speedups than conventional organizations; as well as saving die area. The reuse information included is directly responsible for improving the overall cache hit-ratio and reducing bus utilization, and this makes it relevant for multiprocessor systems. In this paper, we show how the reuse information of the Filter Data Cache scheme can also be used to design competitive coherence protocols tailored to that scheme. These offer better performance results than traditional write-invalidate and write-update policies",
    	booktitle = "Euromicro Conference, 2000. Proceedings of the 26th",
    	doi = "10.1109/EURMIC.2000.874656",
    	isbn = "0-7695-0780-8",
    	keywords = "cache hit-ratio;competitive coherence protocols;filter data cache scheme;microarchitecture;multiprocessor systems;reuse information;write-invalidate policies;write-update policies;multiprocessing systems;performance evaluation;protocols;software reusabili",
    	pages = "378 -385",
    	publisher = "IEEE Computer Society",
    	title = "{D}esigning competitive coherence protocols taking advantage of reuse information",
    	url = "http://dx.doi.org/10.1109/EURMIC.2000.874656",
    	volume = 1,
    	year = 2000
    }
    
  18. A Bermudez, F J Alfaro, R Casado, Jose Duato, F J Quiles and J L Sanchez. Extending dynamic reconfiguration to NOWs with adaptive routing. 2000, 73 - 83. BibTeX

    @conference{ 6826451,
    	author = "A. Bermudez and F.J. Alfaro and R. Casado and Duato, Jose and F.J. Quiles and J.L. Sanchez",
    	abstract = "Many distributed applications executed on networks of workstations (NOWs) require the interconnection network to provide some quality of service (QoS) support. These networks must be able to support topology changes (due to component failures, hot replacement, hot expansion, etc.) without stopping traffic, in order to satisfy QoS requirements. Traditional network reconfiguration methods do not take this into account, causing a serious performance degradation while the network is being reconfigured. previously, we proposed a new dynamic network reconfiguration protocol, called Partial Progressive Reconfiguration. It significantly reduces the negative effects produced by traditional methods. For this reason, it is especially suitable for applications requiring QoS. This reconfiguration protocol requires that messages are routed using up*/down* routing. In this paper, we extend this dynamic reconfiguration technique to support adaptive routing, based on the design methodology for adaptive algorithms proposed previously. We also present performance evaluation results, clearly showing the benefits of using dynamic reconfiguration combined with adaptive routing",
    	address = "Berlin, Germany",
    	journal = "Network-Based Parallel Computing. Communication, Architecture, and Applications. 4th International Workshop, CANPC 2000. Proceedings (Lecture Notes in Computer Science Vol.1797)",
    	keywords = "performance evaluation;quality of service;workstation clusters;",
    	note = "dynamic reconfiguration;adaptive routing;network of workstation;quality of service;topology changes;network reconfiguration methods;dynamic network reconfiguration protocol;Partial Progressive Reconfiguration;reconfiguration protocol;performance evaluation;",
    	pages = "73 - 83",
    	title = "{E}xtending dynamic reconfiguration to {NOW}s with adaptive routing",
    	year = 2000
    }
    
  19. O Lysne and Jose Duato. Fast dynamic reconfiguration in irregular networks. 2000, 449 - 58. URL BibTeX

    @conference{ 6742430,
    	author = "O. Lysne and Duato, Jose",
    	abstract = "Exploitation of the wiring flexibility in Networks of Workstations demands configuration methods that can handle dynamic changes in irregular topologies. During reconfiguration of a network based on virtual cut-through or wormhole switching, however deadlocks in the transition phase between the old and the new routing function must be avoided. The avoidance of such deadlocks will in general make the performance of the network suffer during reconfiguration. Keeping reconfiguration time as short as possible, and leaving as much as possible of the network untouched is therefore of importance. We propose a method for dynamic reconfiguration of networks using up*/down* routing that aims at reducing the consequences of reconfiguration. This is done by identifying a restricted parr of the network, the skyline, as the only part where a full reconfiguration is necessary. This means that most of the network does not need to take part in the reconfiguration at all (other than adding entries for new nodes, and removing entries for removed nodes). Experiments show that for the most frequent configuration changes the skyline will be empty in 85-95% of the cases, leaving the whole of the network operational through the entire reconfiguration. For the most dramatic changes in topology-the addition of a link connecting two previously disjoint networks-an average of 90% of the links can start using the new routing function immediately for some topologies. Our approach is in principle orthogonal to other approaches, thus existing methods for dynamic reconfiguration can be applied in the reconfiguration of the skyline",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings 2000 International Conference on Parallel Processing",
    	keywords = "concurrency control;multiprocessor interconnection networks;reconfigurable architectures;workstation clusters;",
    	note = "dynamic reconfiguration;irregular networks;wiring flexibility;Networks of Workstations;deadlocks;reconfiguration;routing function;",
    	pages = "449 - 58",
    	title = "{F}ast dynamic reconfiguration in irregular networks",
    	url = "http://dx.doi.org/10.1109/ICPP.2000.876161",
    	year = 2000
    }
    
  20. Federico Silla and Jose Duato. High-performance routing in networks of workstations with irregular topology. IEEE Transactions on Parallel and Distributed Systems 11(7):699 - 719, 2000. URL BibTeX

    @article{ 2000465351371,
    	author = "Silla, Federico and Duato, Jose",
    	abstract = "Networks of workstations are rapidly emerging as a cost-effective alternative to parallel computers. Switch-based interconnects with irregular topology allow the wiring flexibility, scalability, and incremental expansion capability required in this environment. However, the irregularity also makes routing and deadlock avoidance on such systems quite complicated. In current proposals, many messages are routed following nonminimal paths, increasing latency and wasting resources. In this paper, we propose two general methodologies for the design of adaptive routing algorithms for networks with irregular topology. Routing algorithms designed according to these methodologies allow messages to follow minimal paths in most cases, reducing message latency and increasing network throughput. As an example of application, we propose two adaptive muting algorithms for AN1 (previously known as Autonet). They can be implemented either by duplicating physical channels or by splitting each physical channel into two virtual channels. In the former case, the implementation does not require a new switch design. It only requires changing the routing tables and adding links in parallel with existing ones, taking advantage of spare switch ports. In the latter case, a new switch design is required, but the network topology is not changed. Evaluation results for several different topologies and message distributions show that the new muting algorithms are able to increase throughput for random traffic by a factor of up to 4 with respect to the original up*/down* algorithm, also reducing latency significantly. For other message distributions, throughput is increased more than seven times. We also show that most of the improvement comes from the use of minimal muting.",
    	address = "Los Alamitos, CA, United States",
    	issn = 10459219,
    	journal = "IEEE Transactions on Parallel and Distributed Systems",
    	key = "Parallel processing systems",
    	keywords = "Adaptive algorithms;Communication channels;Computer workstations;Congestion control;Interconnection networks;Response time;Telecommunication traffic;Topology;",
    	note = "Adaptive routing algorithms;Wormhole switching;",
    	number = 7,
    	pages = "699 - 719",
    	title = "{H}igh-performance routing in networks of workstations with irregular topology",
    	url = "http://dx.doi.org/10.1109/71.877816",
    	volume = 11,
    	year = 2000
    }
    
  21. JC Sancho, Antonio Robles and Jose Duato. Improving minimal adaptive routing in networks with irregular topology. In G Chaudhry and E Sha (eds.). PARALLEL AND DISTRIBUTED COMPUTING SYSTEMS. 2000, 314-319. BibTeX

    @conference{ isi:000179773600050,
    	author = "JC Sancho and Robles, Antonio and Duato, Jose",
    	abstract = "Networks, of workstations (NOWs) are being considered as a cost-effective alternative to parallel computers. Many NOWs are arranged as a switch-based network with irregular topology, which makes routing and deadlock avoidance quite complicated. Several current proposals, like up{*}/down{*} routing, avoid deadlock by removing cyclic dependencies between channels. A more efficient approach consists of allowing cyclic dependencies between channels while providing some escape paths to avoid deadlock. Minimal adaptive routing (MA) is a distributed adaptive routing algorithm that is able to use all the minimal paths and guarantees deadlock freedom by using up{*}/down{*} routing to route messages through the escape paths. Recently, a simple and effective methodology to compute up{*}/down{*} routing tables has been proposed by us. The resulting up{*}/down{*} routing scheme makes use of a different link direction assignment to compute routing tables. Assignment of link direction is based on generating an underlying acyclic connected graph from the network graph. In this paper, we analyze the influence of using the new methodology to compute up{*}/down{*} routing tables on the performance of the minimal adaptive routing algorithm. Evaluation results show that when the methodology to compute up{*}/down{*} routing tables is combined with minimal adaptive routing, an improvement in throughput of up to 40\% is achieved, also reducing latency.",
    	booktitle = "PARALLEL AND DISTRIBUTED COMPUTING SYSTEMS",
    	editor = "Chaudhry, G and Sha, E",
    	isbn = "188084334X",
    	note = "13th International Conference on Parallel and Distributed Computing Systems, LAS VEGAS, NV, AUG 08-10, 2000",
    	pages = "314-319",
    	title = "{I}mproving minimal adaptive routing in networks with irregular topology",
    	year = 2000
    }
    
  22. Jose Flich, Pedro Lopez, M P Malumbres and Jose Duato. Improving routing performance in Myrinet networks. In Parallel and Distributed Processing Symposium, 2000. IPDPS 2000. Proceedings. 14th International. 2000, 27 -32. URL, DOI BibTeX

    @conference{ 845961,
    	author = "Flich, Jose and Lopez, Pedro and M.P. Malumbres and Duato, Jose",
    	abstract = "Networks of workstations (NOWs) are becoming increasingly popular as a cost-effective alternative to parallel computers. Typically, these networks connect processors using irregular topologies, providing the wiring flexibility, scalability, and incremental expansion capability required in this environment. In some of these networks, packets are delivered using source routing. Due to the irregular topology, the routing scheme is often non-minimal. In this paper we analyze the routing scheme used in Myrinet networks in order to improve its performance. We propose new routing algorithms that balance the utilization of the available routes and always use minimal paths. We show through simulation that the current routing schemes used in Myrinet networks can be improved by modifying only the routing software without increasing the software overhead significantly. The overall throughput can be doubled without modifying the network hardware",
    	booktitle = "Parallel and Distributed Processing Symposium, 2000. IPDPS 2000. Proceedings. 14th International",
    	doi = "10.1109/IPDPS.2000.845961",
    	keywords = "Myrinet networks;NOWs;networks of workstations;routing performance;routing scheme;network routing;workstation clusters;",
    	pages = "27 -32",
    	title = "{I}mproving routing performance in {M}yrinet networks",
    	url = "http://dx.doi.org/10.1109/IPDPS.2000.845961",
    	year = 2000
    }
    
  23. Jose Flich, M P Malumbres, Pedro Lopez and Jose Duato. Improving routing performance in Myrinet networks. 2000, 27 - 32. URL BibTeX

    @conference{ 6590291,
    	author = "Flich, Jose and M.P. Malumbres and Lopez, Pedro and Duato, Jose",
    	abstract = "Networks of workstations (NOWs) are becoming increasingly popular as a cost-effective alternative to parallel computers. Typically, these networks connect processors using irregular topologies, providing the wiring flexibility, scalability, and incremental expansion capability required in this environment. In some of these networks, packets are delivered using source routing. Due to the irregular topology, the routing scheme is often non-minimal. In this paper we analyze the routing scheme used in Myrinet networks in order to improve its performance. We propose new routing algorithms that balance the utilization of the available routes and always use minimal paths. We show through simulation that the current routing schemes used in Myrinet networks can be improved by modifying only the routing software without increasing the software overhead significantly. The overall throughput can be doubled without modifying the network hardware",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings 14th International Parallel and Distributed Processing Symposium. IPDPS 2000",
    	keywords = "network routing;workstation clusters;",
    	note = "routing performance;Myrinet networks;NOWs;networks of workstations;routing scheme;",
    	pages = "27 - 32",
    	title = "{I}mproving routing performance in {M}yrinet networks",
    	url = "http://dx.doi.org/10.1109/IPDPS.2000.845961",
    	year = 2000
    }
    
  24. Jose Flich, Pedro Lopez, M P Malumbres and Jose Duato. Improving the performance of regular networks with source routing. In Parallel Processing, 2000. Proceedings. 2000 International Conference on. 2000, 353 -361. URL, DOI BibTeX

    @conference{ 876151,
    	author = "Flich, Jose and Lopez, Pedro and M.P. Malumbres and Duato, Jose",
    	abstract = "Networks of workstations (NOWs) are becoming increasingly popular as a cost-effective alternative to parallel computers. In these machines, the network connects processors using irregular topologies, providing the wiring flexibility, scalability, and incremental expansion capability required in this environment. Also, when performance is the primary concern, these network products are being used to build large commodity clusters with regular topologies. In previous papers, we have proposed the in-transit buffer mechanism to improve network performance, applying it to NOWs with irregular topology and source routing. This mechanism allows the use of minimal paths among all hosts, breaking cyclic dependencies between channels by storing and later re-injecting packers at some intermediate hosts. In this paper we apply the in-transit buffer mechanism to regular networks with source routing in order to improve their performance. Also, two path selection policies are evaluated. The first one will always choose the same minimal path from source to destination, whereas the second one will choose from different alternative minimal paths in a round-robin fashion. The evaluation results show that the overall network throughput can be doubled for large networks",
    	booktitle = "Parallel Processing, 2000. Proceedings. 2000 International Conference on",
    	doi = "10.1109/ICPP.2000.876151",
    	keywords = "NOWs;networks of workstations;parallel computers;path selection policies;regular networks;round-robin;source routing;buffer storage;network routing;performance evaluation;workstation clusters;",
    	pages = "353 -361",
    	title = "{I}mproving the performance of regular networks with source routing",
    	url = "http://dx.doi.org/10.1109/ICPP.2000.876151",
    	year = 2000
    }
    
  25. Jose Flich, Pedro Lopez, M P Malumbres and Jose Duato. Improving the performance of regular networks with source routing. 2000, 353 - 61. URL BibTeX

    @conference{ 6742420,
    	author = "Flich, Jose and Lopez, Pedro and M.P. Malumbres and Duato, Jose",
    	abstract = "Networks of workstations (NOWs) are becoming increasingly popular as a cost-effective alternative to parallel computers. In these machines, the network connects processors using irregular topologies, providing the wiring flexibility, scalability, and incremental expansion capability required in this environment. Also, when performance is the primary concern, these network products are being used to build large commodity clusters with regular topologies. In previous papers, we have proposed the in-transit buffer mechanism to improve network performance, applying it to NOWs with irregular topology and source routing. This mechanism allows the use of minimal paths among all hosts, breaking cyclic dependencies between channels by storing and later re-injecting packers at some intermediate hosts. In this paper we apply the in-transit buffer mechanism to regular networks with source routing in order to improve their performance. Also, two path selection policies are evaluated. The first one will always choose the same minimal path from source to destination, whereas the second one will choose from different alternative minimal paths in a round-robin fashion. The evaluation results show that the overall network throughput can be doubled for large networks",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings 2000 International Conference on Parallel Processing",
    	keywords = "buffer storage;network routing;performance evaluation;workstation clusters;",
    	note = "regular networks;source routing;networks of workstations;NOWs;parallel computers;path selection policies;round-robin;",
    	pages = "353 - 61",
    	title = "{I}mproving the performance of regular networks with source routing",
    	url = "http://dx.doi.org/10.1109/ICPP.2000.876151",
    	year = 2000
    }
    
  26. A Perles, Xavier Molero, A Marti, Vicente Santonja and J J Serrano. Improving the simulation of storage area networks (SAN) using concurrent execution. 2000, 149 - 53. BibTeX

    @conference{ 6880283,
    	author = "A. Perles and Molero, Xavier and A. Marti and Santonja, Vicente and J.J. Serrano",
    	abstract = "A storage area network is a high-speed subnet that establishes a direct connection between heterogeneous storage resources and servers. Up to now, the work done in our department on the performance evaluation of these systems has been carried out using traditional simulation techniques. However, the SAN simulator designed by our researchers needed a lot of computational time to obtain statistically correct. In this work we show how we have improved the execution time of our SAN simulator using a concurrent simulation approach. This approximation basically consists of executing in parallel variable-sized independent replications of the simulation model. The obtained results encourage us to continue working on concurrent simulation development",
    	address = "San Diego, CA, USA",
    	journal = "Simulation in Industry'2000. 12th European Simulation Symposium 2000. ESS'2000",
    	keywords = "local area networks;network servers;performance evaluation;storage management;virtual machines;",
    	note = "storage area networks;concurrent execution;high-speed subnet;heterogeneous storage resources;servers;performance evaluation;execution time;concurrent simulation approach;parallel variable-sized independent replications;",
    	pages = "149 - 53",
    	title = "{I}mproving the simulation of storage area networks ({SAN}) using concurrent execution",
    	year = 2000
    }
    
  27. J C Sancho and Antonio Robles. Improving the up*/down* routing scheme for networks of workstations. 2000, 882 - 9. BibTeX

    @conference{ 6905305,
    	author = "J.C. Sancho and Robles, Antonio",
    	abstract = "Networks of workstations (NOWs) are being considered as a cost-effective alternative to parallel computers. Current proposals use the up*/down* routing algorithm to remove cyclic dependencies between channels and avoid deadlock. A simple and effective methodology to compute up*/down* routing tables has been proposed by Sancho et al. (2000). The resulting up*/down* routing scheme makes use of a different link direction assignment to compute routing tables. Assignment of link direction is based on generating an underlying acyclic connected graph from the network graph. In this paper, we propose and evaluate new heuristic rules to compute the underlying graph. Moreover, we propose a frame balancing algorithm to obtain more efficient up*/down* routing tables when source routing is used. Evaluation results show that the routing algorithm based on the new methodology increases throughput by a factor of up to 2.8 in large networks, also reducing latency significantly",
    	address = "Berlin, Germany",
    	journal = "Euro-Par 2000 Parallel Processing. 6th International Euro-Par Conference. Proceedings (Lecture Notes in Computer Science Vol.1900)",
    	keywords = "network routing;network topology;optimisation;parallel machines;workstation clusters;",
    	note = "up down routing;workstation networks;parallel computers;link direction assignment;acyclic connected graph;heuristic rules;frame balancing;irregular topology;deadlock avoidance;spanning trees;",
    	pages = "882 - 9",
    	title = "{I}mproving the up*/down* routing scheme for networks of workstations",
    	year = 2000
    }
    
  28. JC Sancho and Antonio Robles. Improving the up*/down* routing scheme for networks of workstations. In A Bode, T Ludwig, W Karl and R Wismuller (eds.). EURO-PAR 2000 PARALLEL PROCESSING, PROCEEDINGS 1900. 2000, 882-889. BibTeX

    @conference{ isi:000189042500123,
    	author = "JC Sancho and Robles, Antonio",
    	abstract = "Networks of workstations (NOWs) are being considered as a cost-effective alternative to parallel computers. Many NOWs are arranged as a switch-based network with irregular topology, which makes routing and deadlock avoidance quite complicated. Current proposals use the up{*}/down{*} routing algorithm to remove cyclic dependencies between channels and avoid deadlock. Recently, a simple and effective methodology to compute up{*}/down{*} routing tables has been proposed by us. The resulting up{*}/down{*} routing scheme makes use of a different link direction assignment to compute routing tables. Assignment of link direction is based on generating an underlying acyclic connected graph from the network graph. In this paper, we propose and evaluate new heuristic rules to compute the underlying graph. Moreover, we propose a traffic balancing algorithm to obtain more efficient up{*}/down{*} routing tables when source routing is used. Evaluation results show that the routing algorithm based on the new methodology increases throughput by a factor of up to 2.8 in large networks, also reducing latency significantly.",
    	booktitle = "EURO-PAR 2000 PARALLEL PROCESSING, PROCEEDINGS",
    	editor = "Bode, A and Ludwig, T and Karl, W and Wismuller, R",
    	isbn = 3540679561,
    	issn = "0302-9743",
    	note = "6th International Euro-Par 2000 Conference, MUNICH, GERMANY, AUG 29-SEP 01, 2000",
    	pages = "882-889",
    	series = "LECTURE NOTES IN COMPUTER SCIENCE",
    	title = "{I}mproving the up{*}/down{*} routing scheme for networks of workstations",
    	volume = 1900,
    	year = 2000
    }
    
  29. Xavier Molero, Federico Silla and Vicente Santonja. Modeling and simulation of a network of workstations with wormhole switching. Proceedings of the IEEE Annual Simulation Symposium, pages 299 - 306, 2000. BibTeX

    @article{ 2000295194823,
    	author = "Molero, Xavier and Silla, Federico and Santonja, Vicente",
    	abstract = "Networks of workstations (NOW) are becoming a very popular alternative to parallel computers. This article presents a NOW simulator, the basic queueing models it is based on, its main internal organization, input parameters, and output performance variables. Simple examples of performance measures obtained for message fragmentation, failures in links and switches, and self-similar traffic are given.",
    	address = "Washington, DC, USA",
    	issn = 02724715,
    	journal = "Proceedings of the IEEE Annual Simulation Symposium",
    	key = "Parallel processing systems",
    	keywords = "Adaptive algorithms;Computer architecture;Computer simulation;Computer workstations;Data communication systems;Evaluation;Local area networks;Performance;Switching theory;Telecommunication traffic;",
    	note = "Failures in links and switches;Irregular topologies;Message fragmentation;Networks of workstations;Routing algorithms;Self similar traffic;Software Package CSIM;Wormhole switching;",
    	pages = "299 - 306",
    	title = "{M}odeling and simulation of a network of workstations with wormhole switching",
    	year = 2000
    }
    
  30. Xavier Molero, Federico Silla and Vicente Santonja. Modeling and simulation of a network of workstations with wormhole switching. 2000, 299 - 306. URL BibTeX

    @conference{ 6590026,
    	author = "Molero, Xavier and Silla, Federico and Santonja, Vicente",
    	abstract = "Networks of workstations (NOW) are becoming a very popular alternative to parallel computers for those applications with high needs of resources such as memory capacity processing power and input/output storage space. Typically, these networks connect workstations using irregular topologies, providing wiring flexibility, scalability, and incremental expansion capability required in this environment. In order to analyze and design these kind of systems it is necessary to have adequate tools. To address this problem, we have implemented a very flexible and easy to use NOW simulator. It is based on the one presented in (Silla, 1998; Silla and Duato, 1997; 1998) and it includes three more functionalities: it supports a technique for message fragmentation in packets, generates self-similar traffic, and also it can model networks with permanent faulted links or switches. We present this NOW simulator, the basic queueing models it is based on, its main internal organization, input parameters, output performance variables, and finally, we show several simple examples of performance measures obtained for, among others, message fragmentation, failures in links and switches, and self-similar traffic",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings 33rd Annual Simulation Symposium (SS 2000)",
    	keywords = "digital simulation;packet switching;performance evaluation;telecommunication computing;telecommunication network routing;telecommunication traffic;workstation clusters;",
    	note = "network of workstations;network simulation;wormhole switching;parallel computers;memory capacity;input output storage space;wiring flexibility;message fragmentation;self-similar traffic;queueing models;performance measures;",
    	pages = "299 - 306",
    	title = "{M}odeling and simulation of a network of workstations with wormhole switching",
    	url = "http://dx.doi.org/10.1109/SIMSYM.2000.844928",
    	year = 2000
    }
    
  31. Xavier Molero, Federico Silla, Vicente Santonja and Jose Duato. Modeling and simulation of storage area networks. 2000, 307 - 14. URL BibTeX

    @conference{ 6735495,
    	author = "Molero, Xavier and Silla, Federico and Santonja, Vicente and Duato, Jose",
    	abstract = "Storage area networks (SANs) are an emerging data communications platform which interconnects servers and storage devices (such as disks, disk arrays, and tape drives) to create a pool of storage that users can access directly. This networking approach reports benefits such as computer clustering, topological flexibility, fault tolerance, high availability, and remote management. In order to evaluate the performance of these systems it is necessary to have the adequate tools. Usually, performance evaluation may be based on analytical modeling or simulation. Each of them differs in their scope and applicability. However the simulation modeling technique offers more freedom, flexibility, and accuracy than the analytical methods. Thus, when evaluating the performance of SANs, simulation modeling should be used. In this paper the issues involved in the modeling and design of a very flexible and easy to use SAN simulator are presented. This tool is able to consider among others, both real-world I/O traces and synthetic I/O traffic, message packetization, faults in links and switches, virtual channels, different routing algorithms, etc. We describe its main internal organization, the basic modeling mechanisms the simulator is based on, the main input parameters and output performance variables. Also, the analysis of preliminary results using I/O traces is presented, showing that the storage network increases self-similarity of the traffic received by servers, latency variations are more important for control messages than for data messages, and links have a low utilization",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings 8th International Symposium on Modeling, Analysis and Simulation of Computer and Telecommunication Systems (Cat. No.PR00728)",
    	keywords = "local area networks;performance evaluation;storage management;virtual machines;",
    	note = "storage area networks;modeling;simulation;data communications platform;servers;storage devices;computer clustering;topological flexibility;fault tolerance;high availability;remote management;performance evaluation;real-world I/O traces;synthetic I/O traffic;message packetization;faults;virtual channel;routing algorithms;traffic self-similarity;control messages;data messages;",
    	pages = "307 - 14",
    	title = "{M}odeling and simulation of storage area networks",
    	url = "http://dx.doi.org/10.1109/MASCOT.2000.876553",
    	year = 2000
    }
    
  32. J M Orduna, Vicente Arnau, A Ruiz, R Valero and Jose Duato. On the design of communication-aware task scheduling strategies for heterogeneous systems. 2000, 391 - 8. URL BibTeX

    @conference{ 6742424,
    	author = "J.M. Orduna and Arnau, Vicente and A. Ruiz and R. Valero and Duato, Jose",
    	abstract = "Many research activities have focused on the problem of task scheduling in heterogeneous systems from the computational point of view. However an ideal scheduling strategy would also take into account the communication requirements of the applications and the communication bandwidth that the network can offer. In this paper, we first propose a criterion to measure the suitability of each allocation of network resources to each parallel application, according to the communication requirements. Second, we propose a scheduling technique based exclusively on this criterion that provides a near-optimal mapping of processes to processors according to the communication requirements. Evaluation results show that the use of this scheduling technique fully exploits the available network bandwidth, greatly improving network performance. Therefore, the proposed scheduling technique may be used in the design of communication-aware scheduling strategies for those situations where the communication requirements are the system performance bottleneck",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings 2000 International Conference on Parallel Processing",
    	keywords = "processor scheduling;resource allocation;",
    	note = "communication-aware task scheduling;heterogeneous systems;task scheduling;network resources;parallel application;scheduling technique;near-optimal mapping;",
    	pages = "391 - 8",
    	title = "{O}n the design of communication-aware task scheduling strategies for heterogeneous systems",
    	url = "http://dx.doi.org/10.1109/ICPP.2000.876155",
    	year = 2000
    }
    
  33. Xavier Molero, Federico Silla, Vicente Santonja and Jose Duato. On the effect of link failures in fibre channel storage area networks. 2000, 102 - 11. URL BibTeX

    @conference{ 6832473,
    	author = "Molero, Xavier and Silla, Federico and Santonja, Vicente and Duato, Jose",
    	abstract = "The fast growth of data intensive applications has caused a change in the traditional storage model. The server-to-disk approach is being replaced by storage area networks (SANs), which enable storage to be externalized from servers, thus allowing storage devices to be shared among multiple servers. The prominent technology for implementing SANs is Fibre Channel, due to its suitability for storage networking. Although the probability of a link failure for individual links in a SAN is very low, this probability dramatically increases as the network size becomes larger. Moreover, there are external factors, such as accidental link disconnections, that also can affect the overall SAN reliability. Until the faulty element is replaced, the SAN is functioning in a degraded mode. In this paper we analyze by simulation the performance degradation of Fibre Channel storage area networks when failures in links occur, quantifying how much the global SAN performance is reduced during the time the system remains in the degraded state. We perform this analysis by using both synthetic and real I/O traffic. Simulation results show that performance degradation mainly depends on the routing algorithm and the switch architecture used",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings International Symposium on Parallel Architectures, Algorithms and Networks. I-SPAN 2000",
    	keywords = "optical fibre LAN;optical storage;performance evaluation;",
    	note = "link failures;fibre channel storage area networks;storage model;server-to-disk approach;multiple servers;link failure;network size;performance degradation;real I/O traffic;routing algorithm;switch architecture;",
    	pages = "102 - 11",
    	title = "{O}n the effect of link failures in fibre channel storage area networks",
    	url = "http://dx.doi.org/10.1109/ISPAN.2000.900269",
    	year = 2000
    }
    
  34. Juan Carlos Martinez, Federico Silla, Pedro Lopez and Jose Duato. On the influence of the selection function on the performance of networks of workstations. 2000, 292 - 9. BibTeX

    @conference{ 6977556,
    	author = "Martinez, Juan Carlos and Silla, Federico and Lopez, Pedro and Duato, Jose",
    	abstract = "Previous research has pointed out the influence of adaptive routing on the performance improvement of interconnection networks for clusters of workstations. One of the design issues of adaptive routing algorithms is the selection function, which selects the output channel among all the available choices. We analyze in detail several selection functions in order to evaluate their influence on network performance. Simulation results show that network throughput may be increased up to 10%. When the network is close to saturation, improvements in latency up to 40% may be achieved",
    	address = "Berlin, Germany",
    	journal = "High Performance Computing. Third International Symposium, ISHPC 2000. Proceedings (Lecture Notes in Computer Science Vol.1940)",
    	keywords = "delays;multiprocessor interconnection networks;network routing;network topology;performance evaluation;workstation clusters;",
    	note = "selection function;networks of workstations;interconnection networks;workstation clusters;adaptive routing algorithms;performance evaluation;network throughput;latency;",
    	pages = "292 - 9",
    	title = "{O}n the influence of the selection function on the performance of networks of workstations",
    	year = 2000
    }
    
  35. F J Alfaro, A Bermudez, R Casado, Jose Duato, F J Quiles and J L Sanchez. On the performance of up*/down* routing. 2000, 61 - 72. BibTeX

    @conference{ 6826450,
    	author = "F.J. Alfaro and A. Bermudez and R. Casado and Duato, Jose and F.J. Quiles and J.L. Sanchez",
    	abstract = "Networks of Workstations (NOWs) are usually arranged as a set of interconnected switches with hosts connected to switch ports through interface cards. Several commercial interconnects for high-speed NOWs use up*/down* routing. Every time the network is powered on or the topology is changed, a configuration algorithm is executed, which provides information about the topology and generates a directed graph. Routing tables are computed from this directed graph. There are several ways to obtain the directed graph. The most frequent way is by means of algorithms based on minimum-depth spanning-trees (MDST) or propagation-order spanning-trees (POST). This paper shows that, for most networks, graphs obtained by means of these methods can be improved in order to achieve higher network performance",
    	address = "Berlin, Germany",
    	journal = "Network-Based Parallel Computing. Communication, Architecture, and Applications. 4th International Workshop, CANPC 2000. Proceedings (Lecture Notes in Computer Science Vol.1797)",
    	keywords = "directed graphs;network routing;performance evaluation;workstation clusters;",
    	note = "performance;up*/down* routing;networks of workstations;interconnected switches;configuration algorithm;directed graph;routing tables;minimum-depth spanning-trees;propagation-order spanning-trees;",
    	pages = "61 - 72",
    	title = "{O}n the performance of up*/down* routing",
    	year = 2000
    }
    
  36. Federico Silla and Jose Duato. On the use of virtual channels in networks of workstations with irregular topology. IEEE Transactions on Parallel and Distributed Systems 11(8):813 - 828, 2000. URL BibTeX

    @article{ 2000515393317,
    	author = "Silla, Federico and Duato, Jose",
    	abstract = "Networks of workstations are becoming increasingly popular as a cost-effective alternative to parallel computers. Typically, these networks connect workstations using irregular topologies, providing the wiring flexibility, scalability, and incremental expansion capability required in this environment. Recently, we proposed two methodologies for the design of adaptive routing algorithms for networks with irregular topology, as well as fully adaptive routing algorithms for these networks. These algorithms increase throughput considerably with respect to previously existing ones, but require the use of at least two virtual channels. In this paper, we propose a very efficient flow control protocol to support virtual channels when link wires are very long and/or have different lengths. This flow control protocol relies on the use of channel pipelining and control flits. Control traffic is minimized by assigning physical bandwidth to virtual channels until the corresponding message blocks or it is completely transmitted. Simulation results show that this flow control protocol performs as efficiently as an ideal network with short wires and flit-by-flit multiplexing. The effect of additional virtual channels per physical channel has also been studied, revealing that the optimal number of virtual channels varies with network size. The use of virtual channel priorities is also analyzed. The proposed flow control protocol may increase short message latency, due to long messages monopolizing channels and hindering the progress of short messages. Therefore, we have analyzed the impact of limiting the number of flits (block size) that a virtual channel may forward once it gets the link. Simulation results show that limiting the maximum block size causes the overall network performance to decrease.",
    	address = "Los Alamitos, CA, United States",
    	issn = 10459219,
    	journal = "IEEE Transactions on Parallel and Distributed Systems",
    	key = "Network protocols",
    	keywords = "Adaptive algorithms;Bandwidth;Communication channels;Computer simulation;Computer workstations;Congestion control;Multiplexing;Pipeline processing systems;Telecommunication traffic;",
    	note = "Adaptive routing algorithms;Block multiplexing;Channel pipelining;Virtual channels;Wormhole switching;",
    	number = 8,
    	pages = "813 - 828",
    	title = "{O}n the use of virtual channels in networks of workstations with irregular topology",
    	url = "http://dx.doi.org/10.1109/71.877939",
    	volume = 11,
    	year = 2000
    }
    
  37. Xavier Molero, Federico Silla, Vicente Santonja and Jose Duato. Performance analysis of storage area networks using high-speed LAN interconnects. 2000, 474 - 8. URL BibTeX

    @conference{ 6783964,
    	author = "Molero, Xavier and Silla, Federico and Santonja, Vicente and Duato, Jose",
    	abstract = "Storage area networks (SANs) are an emerging data communications platform which interconnects servers an storage devices (such as disks, disk arrays, and tape drives) to create a pool of storage that users can access directly. SANs eliminate the bandwidth bottlenecks and scalability limitations imposed by previous SCSI bus-based architectures and LAN connections between servers and the stored data. This networking approach reports benefits such as computer clustering, topological flexibility, fault tolerance, high availability, and remote management. The prominent technology for implementing SANs is the fibre channel, due to the suitability of this technology for storage networking. Other technologies for high performance interconnects have also been developed. These interconnects provide switch-based networks with links transferring data at more than 1 Gigabit per second, being mainly used in the LAN environments. We analyze whether these high-speed LAN technologies could also be an interesting alternative to storage networking. We perform this analysis using real-world I/O traces. The main conclusion from our study is that most of the messages present the base network latency, meaning that the network is not heavily loaded. Moreover the response time is, in general, acceptable, being dominated by the time disks need to process the requests",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings IEEE International Conference on Networks 2000 (ICON 2000). Networking Trends and Challenges in the New Millennium",
    	keywords = "data communication;digital storage;disc storage;fault tolerance;LAN interconnection;network servers;network topology;performance evaluation;",
    	note = "storage area networks;high-speed LAN interconnects;performance analysis;data communications platform;servers interconnection;storage devices;disks;disk arrays;tape drives;computer clustering;topological flexibility;fault tolerance;high availability;fibre channel;switch-based networks;real-world I/O traces;network latency;response time;remote management;",
    	pages = "474 - 8",
    	title = "{P}erformance analysis of storage area networks using high-speed {LAN} interconnects",
    	url = "http://dx.doi.org/10.1109/ICON.2000.875833",
    	year = 2000
    }
    
  38. Jose Flich, M P Malumbres, Pedro Lopez and Jose Duato. Performance evaluation of a new routing strategy for irregular networks with source routing. 2000, 34 - 43. URL BibTeX

    @conference{ 7144248,
    	author = "Flich, Jose and M.P. Malumbres and Lopez, Pedro and Duato, Jose",
    	abstract = "Networks of workstations (NOWs) are becoming increasingly popular as a cost-effective alternative to parallel computers. Typically, these networks connect processors using irregular topologies, providing the wiring flexibility, scalability, and incremental expansion capability required in this environment. In some of these networks, messages are delivered using the up*/down* routing algorithm. However, the up*/down* routing scheme is often non-minimal. Also, some of these networks use source routing. With this technique, the entire path to destination is generated at the source host before the message is sent. In this paper we develop a new mechanism in order to improve the performance of irregular networks with source routing, increasing overall throughput. With this mechanism, messages always use minimal paths. To avoid possible deadlocks, when necessary, routes between a pair of hosts are divided into sub-routes, and a special kind of virtual cut-through is performed at some intermediate hosts. We evaluate the new mechanism by simulation using parameters taken from the Myrinet network. We show that the current routing schemes used in Myrinet can be improved by modifying only the routing software without increasing its overhead significantly and, most importantly, without modifying the network hardware. The benefits of using the new routing scheme are noticeable for networks with 16 or more switches, and increase with network size. For 32 and 64-switch networks, throughput is increased on average by a factor ranging from 1.3 to 3.3",
    	address = "New York, NY, USA",
    	journal = "Conference Proceedings of the 2000 International Conference on Supercomputing",
    	keywords = "multiprocessor interconnection networks;network routing;performance evaluation;",
    	note = "performance evaluation;routing strategy;irregular networks;source routing;networks of workstations;deadlocks;virtual cut-through;Myrinet network;routing software;wormhole switching;minimal routing;",
    	pages = "34 - 43",
    	title = "{P}erformance evaluation of a new routing strategy for irregular networks with source routing",
    	url = "http://dx.doi.org/10.1145/335231.335235",
    	year = 2000
    }
    
  39. Rafael Casado, Aurelio Bermudez, Francisco J Quiles, Jose L Sanchez and Jose Duato. Performance evaluation of dynamic reconfiguration in high-speed local area networks. 2000, 85 - 96. BibTeX

    @conference{ 2002126889956,
    	author = "Rafael Casado and Aurelio Bermudez and Francisco J. Quiles and Jose L. Sanchez and Duato, Jose",
    	abstract = "A new deadlock-free distributed reconfiguration algorithm that is able to asynchronously update routing tables without stopping the user traffic is proposed. This algorithm is valid for any topology, including regular as well as irregular topologies. Simulation results show that the behavior of such algorithm is significantly better than for other algorithms based on a spanning-tree formation.",
    	address = "Toulouse, France",
    	journal = "IEEE High-Performance Computer Architecture Symposium Proceedings",
    	key = "Local area networks",
    	keywords = "Algorithms;Computer simulation;Computer system recovery;Distributed computer systems;Multimedia systems;Packet switching;Quality of service;Real time systems;",
    	note = "Dynamic reconfiguration;High speed networks;Network interface card;",
    	pages = "85 - 96",
    	title = "{P}erformance evaluation of dynamic reconfiguration in high-speed local area networks",
    	year = 2000
    }
    
  40. Xavier Molero, Federico Silla, Vicente Santonja and Jose Duato. Performance sensitivity of routing algorithms to failures in networks of workstations. 2000, 230 - 42. BibTeX

    @conference{ 6977549,
    	author = "Molero, Xavier and Silla, Federico and Santonja, Vicente and Duato, Jose",
    	abstract = "Networks of workstations (NOW) are becoming an increasingly popular alternative to parallel computers for those applications with high needs of resources such as memory capacity and input/output storage space, and also for small-scale parallel computing. Although the mean time between failures (MTBF) for individual links and switches in a NOW is very high, the probability of a failure occurrence dramatically increases as the network size becomes larger. Moreover, there are external factors, such as accidental link disconnections, that also can affect the overall NOW reliability. Until the faulty element is replaced, the NOW is functioning in a degraded mode. Thus, it becomes necessary to quantify how much the global NOW performance is reduced during the time the system remains in this state. We analyze the performance degradation of networks of workstations when failures in links or switches occur. Because the routing algorithm is a key issue in the design of a NOW, we quantify the sensitivity to failures of two routing algorithms: up*/down* and minimal adaptive routing algorithms. Simulation results show that, in general, up*/down* routing is highly robust to failures. On the other hand, the minimal adaptive routing algorithm presents a better performance, even in the presence of failures, but at the expense of a larger sensitivity",
    	address = "Berlin, Germany",
    	journal = "High Performance Computing. Third International Symposium, ISHPC 2000. Proceedings (Lecture Notes in Computer Science Vol.1940)",
    	keywords = "computer network reliability;network routing;performance evaluation;probability;workstation clusters;",
    	note = "performance sensitivity;networks of workstations;NOW;small-scale parallel computing;mean time between failures;MTBF;failure probability;reliability;performance degradation;up*/down* routing algorithm;minimal adaptive routing algorithm;",
    	pages = "230 - 42",
    	title = "{P}erformance sensitivity of routing algorithms to failures in networks of workstations",
    	year = 2000
    }
    
  41. A J Sanchez and J M Martinez. Robot-arm pick and place behavior programming system using visual perception. In Pattern Recognition, 2000. Proceedings. 15th International Conference on 4. 2000, 507 -510 vol.4. URL, DOI BibTeX

    @conference{ 902968,
    	author = "A.J. Sanchez and J.M. Martinez",
    	abstract = "This paper presents the programming of a robot-arm system for carrying out flexible pick and place behavior using visual perception. Object manipulation from visual data involves determining the pose of the object with respect to the manipulator. Taking into account that visual positioning is an ill-posed problem due to the perspective projection, this system uses a camera and a sensor distance, and both of them mounted on a robot-arm tool adapter for locating (positioning and orienting) objects. On the other hand, this programming system is modular, composed of different dynamic link libraries to be independent with the hardware and offers a friendly graphic interface where the user can define pick and place object locations on the image space",
    	booktitle = "Pattern Recognition, 2000. Proceedings. 15th International Conference on",
    	doi = "10.1109/ICPR.2000.902968",
    	keywords = "computer vision;graphical user interface;perspective projection;pick and place operation;position control;robot behavior programming;robot programming;visual positioning;calibration;computer vision;graphical user interfaces;industrial robots;position cont",
    	pages = "507 -510 vol.4",
    	title = "{R}obot-arm pick and place behavior programming system using visual perception",
    	url = "http://dx.doi.org/10.1109/ICPR.2000.902968",
    	volume = 4,
    	year = 2000
    }
    
  42. Julio Sahuquillo, Teresa Nachiondo, J -C Cano, J A Gil and A Pont. Self-similarity in SPLASH-2 workloads on shared memory multiprocessors systems. In Parallel and Distributed Processing, 2000. Proceedings. 8th Euromicro Workshop on. 2000, 293 -300. URL, DOI BibTeX

    @conference{ 823423,
    	author = "Sahuquillo, Julio and Nachiondo, Teresa and J. -C. Cano and J.A. Gil and A. Pont",
    	abstract = "The workload used for evaluating and obtaining performance results in shared memory multiprocessors are widely heterogeneous. Traces have been used over several decades and as computers systems grew in power, semantic benchmarks, like SPLASH2, became the most common workloads. Unfortunately, few benchmarks are available. Recently, self-similar studies have been performed in several computer domains. In this paper, we study the self-similar properties of several SPLASH2 benchmarks. Each benchmark has been studied independently, and all exhibit a clearly self-similar behaviour. The results enable the construction of a self-similar memory reference generator that makes a wide variety of parallel workload traces in a a flexible manner; as well as quickly",
    	booktitle = "Parallel and Distributed Processing, 2000. Proceedings. 8th Euromicro Workshop on",
    	doi = "10.1109/EMPDP.2000.823423",
    	isbn = "0-7695-0500-7",
    	keywords = "SPLASH-2 workloads;parallel workload traces;self-similar memory reference generator;self-similarity;semantic benchmarks;shared memory multiprocessors systems;parallel processing;shared memory systems;software performance evaluation;system monitoring;",
    	pages = "293 -300",
    	title = "{S}elf-similarity in {SPLASH}-2 workloads on shared memory multiprocessors systems",
    	url = "http://dx.doi.org/10.1109/EMPDP.2000.823423",
    	year = 2000
    }
    
  43. Elvira Baydal, Pedro Lopez and Jose Duato. Simple and efficient mechanism to prevent saturation in wormhole networks. Proceedings of the International Parallel Processing Symposium, IPPS, pages 617 - 622, 2000. BibTeX

    @article{ 2000265175264,
    	author = "Baydal, Elvira and Lopez, Pedro and Duato, Jose",
    	abstract = "Both deadlock avoidance and recovery techniques suffer from severe performance degradation when the network is close to or beyond saturation. This performance degradation appears because messages block in the network faster than they are drained by the escape paths in the deadlock avoidance strategies or the deadlock recovery mechanism. Many parallel applications produce bursty traffic that may saturate the network during some intervals, significantly increasing execution time. Therefore, the use of techniques that prevent network saturation are of crucial importance. Although several mechanisms have been proposed in the literature to reach this goal, some of them introduce some penalty when the network is not fully saturated, require complex hardware to be implemented or do not behave well under all network load conditions. In this paper, we propose a new mechanism to avoid network saturation that overcomes these drawbacks.",
    	address = "United States",
    	issn = "1063-7133",
    	journal = "Proceedings of the International Parallel Processing Symposium, IPPS",
    	key = "Parallel processing systems",
    	keywords = "Computer system recovery;Congestion control;Fault tolerant computer systems;Response time;Telecommunication traffic;",
    	note = "Deadlock recovery methods;Wormhole networks;",
    	pages = "617 - 622",
    	title = "{S}imple and efficient mechanism to prevent saturation in wormhole networks",
    	year = 2000
    }
    
  44. Young-Joo Suh, Binh Vien Dao, Jose Duato and Sudhakar Yalamanchili. Software-based rerouting for fault-tolerant pipelined communication. IEEE Transactions on Parallel and Distributed Systems 11(3):193 - 211, 2000. URL BibTeX

    @article{ 2000295197161,
    	author = "Young-Joo Suh and Binh Vien Dao and Duato, Jose and Sudhakar Yalamanchili",
    	abstract = "This paper presents a software-based approach to fault-tolerant routing in networks using wormhole or virtual cut-through switching. When a message encounters a faulty output link, it is removed from the network by the local router and delivered to the messaging layer of the local node's operating system. The message passing software can reroute this message, possibly along nonminimal paths. Alternatively, the message may be addressed to an intermediate node, which will forward the message to the destination. A message may encounter multiple faults and pass through multiple intermediate nodes. The proposed techniques are applicable to both obliviously and adaptively routed networks. The techniques are specifically targeted toward commercial multiprocessors where the mean time to repair (MTTR) is much smaller than the mean time between router failures (MTBF), i.e., it is sufficient to tolerate a maximum of three failures. This paper presents requirements for buffer management, deadlock freedom, and livelock freedom. Simulation results are presented to evaluate the degradation in latency and throughput as a function of the number and distribution of faults. There are several advantages of such an approach. Router designs are minimally impacted, and thus remain compact and fast. Only messages that encounter faulty components are affected, while the machine is ensured of continued operation until the faulty components can be replaced. The technique leverages existing network technology, and the concepts are portable across evolving switch and router designs. Therefore, we feel that the technique is a good candidate for incorporation into the next generation of multiprocessor networks.",
    	address = "Los Alamitos, CA, United States",
    	issn = 10459219,
    	journal = "IEEE Transactions on Parallel and Distributed Systems",
    	key = "Pipeline processing systems",
    	keywords = "Computer simulation;Computer software;Congestion control (communication);Fault tolerant computer systems;Routers;Telecommunication traffic;",
    	note = "Mean time between router failures (MTBF);Mean time to repair (MTTR);Wormhole switching;",
    	number = 3,
    	pages = "193 - 211",
    	title = "{S}oftware-based rerouting for fault-tolerant pipelined communication",
    	url = "http://dx.doi.org/10.1109/71.841738",
    	volume = 11,
    	year = 2000
    }
    
  45. Julio Sahuquillo and A Pont. Splitting the data cache: a survey. Concurrency, IEEE 8(3):30 -35, 2000. URL, DOI BibTeX

    @article{ 865890,
    	author = "Sahuquillo, Julio and A. Pont",
    	abstract = "Recent cache-memory research has focused on approaches that split the first-level data cache into two independent subcaches. The authors introduce a methodology for helping cache designers devise splitting schemes and survey a representative set of the published cache schemes",
    	doi = "10.1109/4434.865890",
    	issn = "1092-3063",
    	journal = "Concurrency, IEEE",
    	keywords = "cache design;cache memory;cache splitting scheme design;first-level data cache splitting;independent subcaches;survey;cache storage;reviews;",
    	month = "jul-sep",
    	number = 3,
    	pages = "30 -35",
    	title = "{S}plitting the data cache: a survey",
    	url = "http://dx.doi.org/10.1109/4434.865890",
    	volume = 8,
    	year = 2000
    }
    
  46. D Love, S Yalamanchili, Jose Duato, M B Caminero and F J Quiles. Switch scheduling in the multimedia router (MMR). 2000, 5 - 11. URL BibTeX

    @conference{ 6590288,
    	author = "D. Love and S. Yalamanchili and Duato, Jose and M.B. Caminero and F.J. Quiles",
    	abstract = "The primary goal of the Multimedia Router (MMR) project is the design and implementation of a router optimized for multimedia applications. The router is targeted for use in cluster and LAN interconnection networks which offer different constraints and therefore differing router solutions than WANs. This paper describes and evaluates a switch scheduling algorithm based on a priority biasing scheme for dynamically updating the priorities of the connections established through the router. Unlike existing schemes that simply use the age of a flit as its priority, the novel feature of the proposed approach is that the priority is biased using the measured quality of service (QoS) values for the connection. Furthermore, the structure of the switch scheduling algorithm is motivated by opportunities for pipelined and concurrent operation so that scheduling decisions could be made at switching speeds. The performance of two of the many possible biasing functions is evaluated",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings 14th International Parallel and Distributed Processing Symposium. IPDPS 2000",
    	keywords = "LAN interconnection;local area networks;multimedia communication;",
    	note = "multimedia router;multimedia;cluster;LAN interconnection networks;switch scheduling;quality of service;",
    	pages = "5 - 11",
    	title = "{S}witch scheduling in the multimedia router ({MMR})",
    	url = "http://dx.doi.org/10.1109/IPDPS.2000.845958",
    	year = 2000
    }
    
  47. D Love, S Yalamanchili, Jose Duato, M B Caminero and F J Quiles. Switch scheduling in the Multimedia Router (MMR). Proceedings of the International Parallel Processing Symposium, IPPS, pages 5 - 11, 2000. BibTeX

    @article{ 2000265175186,
    	author = "D. Love and S. Yalamanchili and Duato, Jose and M.B. Caminero and F.J. Quiles",
    	abstract = "The primary goal of the Multimedia Router (MMR) project is the design and implementation of a router optimized for multimedia applications. The router is targeted for use in cluster and LAN interconnection networks which offer different constraints and therefore differing router solutions than WANs. This paper describes and evaluates a switch scheduling algorithm based on a priority biasing scheme for dynamically updating the priorities of the connections established through the router. Unlike existing schemes that simply use the age of a flit as its priority, the novel feature of the proposed approach is that the priority is biased using the measured quality of service (QoS) values for the connection. Furthermore, the structure of the switch scheduling algorithm is motivated by opportunities for pipelined and concurrent operation so that scheduling decisions could be made at switching speeds. The performance of two of the many possible biasing functions is evaluated.",
    	address = "United States",
    	issn = 10637133,
    	journal = "Proceedings of the International Parallel Processing Symposium, IPPS",
    	key = "Multimedia systems",
    	keywords = "Algorithms;Data communication systems;Interconnection networks;Local area networks;Pipeline processing systems;Routers;Telecommunication services;",
    	note = "Multimedia routers (MMR);Switch scheduling;",
    	pages = "5 - 11",
    	title = "{S}witch scheduling in the {M}ultimedia {R}outer ({MMR})",
    	year = 2000
    }
    
  48. J -C Cano, A Pont, Julio Sahuquillo and J A Gil. The differences between distributed shared memory caching and proxy caching. Concurrency, IEEE 8(3):45 -47, 2000. URL, DOI BibTeX

    @article{ 865892,
    	author = "J. -C. Cano and A. Pont and Sahuquillo, Julio and J.A. Gil",
    	abstract = "The authors discuss the similarities in caching between the extensively studied distributed shared memory systems and the emerging proxy systems. They believe that several of the techniques used in distributed shared memory systems can be adapted and applied to proxy systems",
    	doi = "10.1109/4434.865892",
    	issn = "1092-3063",
    	journal = "Concurrency, IEEE",
    	keywords = "caching;distributed shared memory systems;proxy systems;cache storage;distributed shared memory systems;",
    	month = "jul-sep",
    	number = 3,
    	pages = "45 -47",
    	title = "{T}he differences between distributed shared memory caching and proxy caching",
    	url = "http://dx.doi.org/10.1109/4434.865892",
    	volume = 8,
    	year = 2000
    }
    
  49. Ruoming Pang, T M Pinkston and Jose Duato. The double scheme: deadlock-free dynamic reconfiguration of cut-through networks. 2000, 439 - 48. URL BibTeX

    @conference{ 6742429,
    	author = "Ruoming Pang and T.M. Pinkston and Duato, Jose",
    	abstract = "Network-based computing systems often require the ability to reconfigure the routing algorithm to reflect changes in network topology if and when those changes occur. The process of reconfiguring a network's routing capabilities may lead to deadlock if not handled properly. In this paper we propose efficient and deadlock-free dynamic reconfiguration techniques that are generically applicable to distributed routing algorithms and networks, including those which use wormhole switching. The proposed techniques do not impede the transmission of packets during the reconfiguration process, thus providing increased network availability and quality-of-service (QoS) support as compared to traditional techniques based on static reconfiguration",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings 2000 International Conference on Parallel Processing",
    	keywords = "concurrency control;local area networks;multiprocessor interconnection networks;reconfigurable architectures;",
    	note = "deadlock-free;dynamic reconfiguration;cut-through networks;network topology;distributed routing;wormhole switching;",
    	pages = "439 - 48",
    	title = "{T}he double scheme: deadlock-free dynamic reconfiguration of cut-through networks",
    	url = "http://dx.doi.org/10.1109/ICPP.2000.876160",
    	year = 2000
    }
    
  50. J -C Cano, Teresa Nachiondo, Julio Sahuquillo, A Pont and J A Gil. WWW client/server traffic characterization: a proxy server point of view. In System Sciences, 2000. Proceedings of the 33rd Annual Hawaii International Conference on. 2000, 10 pp.. URL, DOI BibTeX

    @conference{ 926874,
    	author = "J. -C. Cano and Nachiondo, Teresa and Sahuquillo, Julio and A. Pont and J.A. Gil",
    	abstract = "When performance studies about proxy cache server systems are made, one of the most common difficulties is to identify and to obtain representative workloads. Traces have been used as traditional workload. Gathering traces imply a large amount of time. If a self-similar traffic generator could be used, this problem would be solved, therefore evaluation studies become faster and more flexible. This work contains two parts; first, we perform a study of the self-similar property of several characteristics of the arrival collected traces, such as response size pattern, elapsed request time pattern and so on. Secondly, we model a source and develop a self-similar traffic arrival pattern generator.",
    	booktitle = "System Sciences, 2000. Proceedings of the 33rd Annual Hawaii International Conference on",
    	doi = "10.1109/HICSS.2000.926874",
    	isbn = "0-7695-0493-0",
    	keywords = "World Wide Web; client server traffic; elapsed request time pattern; performance studies; proxy cache server systems; response size pattern; self-similar traffic generator; traces; workloads; Internet; client-server systems; information resources; teleco",
    	month = "jan.",
    	pages = "10 pp.",
    	title = "{WWW} client/server traffic characterization: a proxy server point of view",
    	url = "http://dx.doi.org/10.1109/HICSS.2000.926874",
    	year = 2000
    }
    
  51. Jose Duato, Antonio Robles, Federico Silla and R Beivide. A comparison of router architectures for virtual cut-through and wormhole switching in a NOW environment. 1999, 240 - 7. URL BibTeX

    @conference{ 6245442,
    	author = "Duato, Jose and Robles, Antonio and Silla, Federico and R. Beivide",
    	abstract = "Most commercial routers designed for networks of workstations (NOWs) implement wormhole switching. However wormhole switching is not well suited for NOWs. The long wires required in this environment lead to large buffers to prevent buffer overflow during flow control signaling. Moreover, wire length is limited by buffer size. Virtual cut-through (VCT) achieves a higher throughput than wormhole switching. Moreover, the traditional disadvantages of VCT switching, as buffer requirements and packetizing overhead, disappear in NOWs. In this paper, we show that VCT routers can be simpler than wormhole ones, while still achieving the advantages of using virtual channels and adaptive routing. We also propose a fully adaptive routing algorithm for VCT switching in NOWs. Moreover, we show that VCT routers outperform wormhole routers in a NOW environment at a lower cost",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings 13th International Parallel Processing Symposium and 10th Symposium on Parallel and Distributed Processing. IPPS/SPDP 1999",
    	keywords = "multiprocessor interconnection networks;network routing;workstation clusters;",
    	note = "router architectures;virtual cut-through;wormhole switching;NOW environment;networks of workstations;buffer requirements;packetizing overhead;VCT routers;",
    	pages = "240 - 7",
    	title = "{A} comparison of router architectures for virtual cut-through and wormhole switching in a {NOW} environment",
    	url = "http://dx.doi.org/10.1109/IPPS.1999.760469",
    	year = 1999
    }
    
  52. Marina Alonso and Vicente Santonja. A new destage algorithm for disk cache: DOME. In EUROMICRO Conference, 1999. Proceedings. 25th vol.1. 1999, 416 - 23. URL, DOI BibTeX

    @conference{ 6364156,
    	author = "Alonso, Marina and Santonja, Vicente",
    	abstract = "Microprocessor technology is advancing at an incredible rate, unfortunately disk technology is not increasing at the same rate; thus, disk subsystem performance is becoming a dominant factor in the overall system behaviour. Disk caches have been effective to improve I/O performance. Several studies show that disk access patterns are dominated by write operations, then the use of non-volatile write caches together with a write-behind strategy seems to be essential. In this paper, a new destage algorithm for disk write caches, called DOME (Destage Only Modified oncE), is presented. Using simulation, DOME is compared with other preceding algorithms such as the threshold scheduling, providing better response time and behaviour, not only for write requests, but also for read requests. These results have been obtained feeding the simulator with real traces",
    	booktitle = "EUROMICRO Conference, 1999. Proceedings. 25th",
    	doi = "http://dx.doi.org/10.1109/EURMIC.1999.794503",
    	isbn = "0-7695-0321-7",
    	journal = "Proceedings 25th EUROMICRO Conference. Informatics: Theory and Practice for the New Millennium",
    	keywords = "cache storage;magnetic disc storage;",
    	month = "Sep.",
    	note = "destage algorithm;disk cache;DOME;microprocessor technology;disk subsystem performance;system behaviour;disk access patterns;write operations;destage only modified once;threshold scheduling;real traces;",
    	pages = "416 - 23",
    	publisher = "IEEE Computer Society Press",
    	title = "{A} new destage algorithm for disk cache: {DOME}",
    	url = "http://dx.doi.org/10.1109/EURMIC.1999.794503",
    	volume = "vol.1",
    	year = 1999
    }
    
  53. V Puente, R Beivide, J A Gregorio, J M Prellezo, Jose Duato and C Izu. Adaptive bubble router: a design to improve performance in torus networks. 1999, 58 - 67. URL BibTeX

    @conference{ 6397188,
    	author = "V. Puente and R. Beivide and J.A. Gregorio and J.M. Prellezo and Duato, Jose and C. Izu",
    	abstract = "A router design for torus networks that significantly reduces message latency over traditional wormhole routers is presented in this paper. This new router implements virtual cut-through switching and fully-adaptive minimal routing. Packet deadlock is avoided by providing escape ways governed by Bubble flow control, a mechanism that guarantees enough free buffer space in the network to allow continuous packet movement. Both deterministic and adaptive Bubble routers have been designed in VLSI using VHDL synthesis tools. Adopting a fair quantitative comparison, we demonstrate that Bubble routers exhibit a reduction in base latency values over 40% with respect to the corresponding wormhole routers, without any penalty in network throughput. With much lower VLSI costs than adaptive wormhole routers, the adaptive Bubble router is even faster than deterministic wormhole routers based on virtual channels",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings of the 1999 International Conference on Parallel Processing",
    	keywords = "multiprocessor interconnection networks;network routing;performance evaluation;VLSI;",
    	note = "adaptive bubble router;performance;torus networks;router design;message latency;virtual cut-through switching;fully-adaptive minimal routing;VLSI;VHDL synthesis tools;base latency values;virtual channels;",
    	pages = "58 - 67",
    	title = "{A}daptive bubble router: a design to improve performance in torus networks",
    	url = "http://dx.doi.org/10.1109/ICPP.1999.797388",
    	year = 1999
    }
    
  54. Maria E Gomez and Vicente Santonja. Analysis of self-similarity in I/O workload using structural modeling. In Modeling, Analysis and Simulation of Computer and Telecommunication Systems, 1999. Proceedings. 7th International Symposium on. 1999, 234 - 42. URL, DOI BibTeX

    @conference{ 6512239,
    	author = "Gomez, Maria E. and Santonja, Vicente",
    	abstract = "Demonstrates that disk-level I/O requests are self-similar in nature. We show evidence (both visual and mathematical) that I/O accesses are consistent with self-similarity. For this analysis, we have used two sets of disk activity traces collected from various systems over different periods of time. In addition to studying the aggregated I/O workload that is directed to the storage system, we perform a structural modeling of the workload in order to understand the underlying causes that produce the observed self-similarity. This structural modeling shows that self-similar behavior can be explained by combining two different approaches: the on/off source model and Cox's model. The former applies to those processes that remain active during the whole trace, while the latter applies to sources that show a very short activity time",
    	booktitle = "Modeling, Analysis and Simulation of Computer and Telecommunication Systems, 1999. Proceedings. 7th International Symposium on",
    	doi = "10.1109/MASCOT.1999.805060",
    	isbn = "0-7695-0381-0",
    	journal = "MASCOTS '99. Proceedings of the Seventh International Symposium on Modeling, Analysis and Simulation of Computer and Telecommunication Systems",
    	keywords = "disc storage;fractals;input-output programs;performance evaluation;",
    	month = "Oct.",
    	note = "self-similarity;I/O workload;structural modeling;disk-level I/O requests;I/O accesses;disk activity traces;storage system;on/off source model;Cox's model;activity time;",
    	pages = "234 - 42",
    	publisher = "IEEE Computer Society",
    	title = "{A}nalysis of self-similarity in {I}/{O} workload using structural modeling",
    	url = "http://dx.doi.org/10.1109/MASCOT.1999.805060",
    	year = 1999
    }
    
  55. Jose Duato, Antonio Robles, Federico Silla and R Beivide. Comparison of router architectures for virtual cut-through and wormhole switching in a NOW environment. Proceedings of the International Parallel Processing Symposium, IPPS, pages 240 - 247, 1999. BibTeX

    @article{ 1999394752205,
    	author = "Duato, Jose and Robles, Antonio and Silla, Federico and R. Beivide",
    	abstract = "Most commercial routers designed for networks of workstations (NOWs) implement wormhole switching. However, wormhole switching is not well suited for NOWs. The long wires required in this environment lead to large buffers to prevent buffer overflow during flow control signaling. Moreover, wire length is limited by buffer size. Virtual cut-through (VCT) achieves a higher throughput than wormhole switching. Moreover, the traditional disadvantages of VCT switching, as buffer requirements and packetizing overhead, disappear in NOWs. In this paper, we show that VCT routers can be simpler than wormhole ones, while still achieving the advantages of using virtual channels and adaptive routing. We also propose a fully adaptive routing algorithm for VCT switching in NOWs. Moreover, we show that VCT routers outperform wormhole routers in a NOW environment at a lower cost.",
    	address = "San Juan",
    	issn = 10637133,
    	journal = "Proceedings of the International Parallel Processing Symposium, IPPS",
    	key = "Pipeline processing systems",
    	keywords = "Adaptive algorithms;Computer architecture;Computer workstations;Switching networks;",
    	note = "Virtual cut-through (VCT);Wormhole switching;",
    	pages = "240 - 247",
    	title = "{C}omparison of router architectures for virtual cut-through and wormhole switching in a {NOW} environment",
    	year = 1999
    }
    
  56. R Casado, F J Quiles, J L Sanchez and Jose Duato. Deadlock-free routing in irregular networks with dynamic reconfiguration. 1999, 165 - 80. BibTeX

    @conference{ 6429577,
    	author = "R. Casado and F.J. Quiles and J.L. Sanchez and Duato, Jose",
    	abstract = "High-speed local area networks (LANs) support many distributed applications, These applications require some system availability guarantees. However, LANs may change their topology due to switches and hosts being turned on/off, link remapping, and component failures. In these cases, a distributed reconfiguration algorithm is executed. This algorithm analyzes the topology, computes the new routing tables, and downloads them to the corresponding switches. Unfortunately, in most cases user traffic is stopped during the reconfiguration process to avoid deadlock. Although network reconfigurations are not frequent, they may take hundreds of milliseconds to execute, thus degrading system availability significantly. In this paper, we propose a new deadlock-free distributed reconfiguration algorithm that is able to asynchronously update the routing tables without stopping user traffic. This dynamic reconfiguration algorithm is valid for any topology, including regular as well as irregular topologies",
    	address = "Berlin, Germany",
    	journal = "Network-Based Parallel Computing. Communication, Architecture, and Applications. Third International Workshop, CANPC'99 Proceedings",
    	keywords = "concurrency control;local area networks;reconfigurable architectures;telecommunication network routing;",
    	note = "local area networks;distributed reconfiguration;deadlock;distributed reconfiguration algorithm;routing tables;dynamic reconfiguration;irregular networks;deadlock-free routing;",
    	pages = "165 - 80",
    	title = "{D}eadlock-free routing in irregular networks with dynamic reconfiguration",
    	year = 1999
    }
    
  57. Binh Vien Dao, Jose Duato and Sudhakar Yalamanchili. Dynamically configurable message flow control for fault-tolerant routing. IEEE Transactions on Parallel and Distributed Systems 10(1):7 - 22, 1999. URL BibTeX

    @article{ 1999160018138,
    	author = "Binh Vien Dao and Duato, Jose and Sudhakar Yalamanchili",
    	abstract = "Fault-tolerant routing protocols in modern interconnection networks rely heavily on the network flow control mechanisms used. Optimistic flow control mechanisms, such as wormhole switching (WS), realize very good performance, but are prone to deadlock in the presence of faults. Conservative flow control mechanisms, such as pipelined circuit switching (PCS), ensure the existence of a path to the destination prior to message transmission, achieving reliable transmission at the expense of performance. This paper proposes a general class of flow control mechanisms that can be dynamically configured to trade-off reliability and performance. Routing protocols can then be designed such that, in the vicinity of faults, protocols use a more conservative flow control mechanism, while the majority of messages that traverse fault-free portions of the network utilize a WS like flow control to maximize performance. We refer to such protocols as two-phase protocols. This ability provides new avenues for optimizing message passing performance in the presence of faults. A fully adaptive two-phase protocol is proposed, and compared via simulation to those based on WS and PCS. The architecture of a network router supporting configurable flow control is also described.",
    	address = "Los Alamitos, CA, United States",
    	issn = 10459219,
    	journal = "IEEE Transactions on Parallel and Distributed Systems",
    	key = "Interconnection networks",
    	keywords = "Communication channels;Computer system recovery;Data communication systems;Fault tolerant computer systems;Network protocols;Pipeline processing systems;Virtual reality;",
    	note = "Message flow control;Pipelined circuit switching;Wormhole switching;",
    	number = 1,
    	pages = "7 - 22",
    	title = "{D}ynamically configurable message flow control for fault-tolerant routing",
    	url = "http://dx.doi.org/10.1109/71.744829",
    	volume = 10,
    	year = 1999
    }
    
  58. J M Martinez, Pedro Lopez and Jose Duato. Impact of buffer size on the efficiency of deadlock detection. 1999, 315 - 18. URL BibTeX

    @conference{ 6169109,
    	author = "J.M. Martinez and Lopez, Pedro and Duato, Jose",
    	abstract = "Deadlock detection is one of the most important design issues in recovery strategies for routing in interconnection networks. In a previous paper, we presented an efficient deadlock detection mechanism. This mechanism requires that when a message header blocks it must be quickly notified to all the channels reserved by that message. To achieve this goal, the detection mechanism uses the information provided by flow control. Some recent commercial multiprocessors use deep buffers, since they may increase network throughput and efficiently allow transmission over long wires. However, deep buffers may increase the elapsed time between header blocking at a router and the propagation of flow control signals, thus negatively affecting the behavior of our deadlock detection mechanism. On the other hand, deeper buffers reduce deadlock frequency. As a consequence, buffer size has opposing effects on deadlock detection. In this paper, we analyze by simulation the influence of these effects on the efficiency of our deadlock detection mechanism, showing that overall performance improves with buffer size",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings Fifth International Symposium on High-Performance Computer Architecture",
    	keywords = "concurrency control;multiprocessor interconnection networks;",
    	note = "buffer size;deadlock detection;recovery strategies;interconnection networks routing;multiprocessors;deep buffers;simulation;",
    	pages = "315 - 18",
    	title = "{I}mpact of buffer size on the efficiency of deadlock detection",
    	url = "http://dx.doi.org/10.1109/HPCA.1999.744385",
    	year = 1999
    }
    
  59. J F Martinez, J Torrellas and Jose Duato. Improving the performance of bristled CC-NUMA systems using virtual channels and adaptivity. 1999, 202 - 9. URL BibTeX

    @conference{ 6734273,
    	author = "J.F. Martinez and J. Torrellas and Duato, Jose",
    	abstract = "Current high-end parallel systems achieve low-latency, high-bandwidth network communication through the use of aggressive design techniques and expensive mechanical and electrical parts. High-speed interconnection networks, which are crucial to achieving acceptable system performance, may account for an important fraction of the total cost of the machine. To reduce the network cost and still maintain scalability, bristled configurations, in which each router connects to several processing nodes, pose an attractive alternative. Their lower bandwidth, however, may adversely affect the efficiency of the parallel codes. We show how virtual channels and adaptive routing can make bristled systems more attractive: overall performance improves in congested scenarios while remaining practically unaltered under light traffic conditions. Experimental results are obtained by using execution-driven simulation of a complete state-of-the-art CC-NUMA system, with dynamic superscalar processors and contemporary pipelined routers. The results show that, in bristled hypercubes with 2 processing nodes per router, SPLASH-2 applications with significant communication run 5-15% faster if we make use of virtual channels and adaptive routing. The resulting systems are only 1-10% slower than systems with non-bristled hypercubes and similar routing support, even though the former only need about half of the network hardware components present in the latter. Additionally, virtual channels and adaptivity are shown to be of negligible effect in non-bristled hypercubes",
    	address = "New York, NY, USA",
    	journal = "Conference Proceedings of the 1999 International Conference on Supercomputing",
    	keywords = "discrete event simulation;distributed shared memory systems;hypercube networks;network routing;parallel architectures;performance evaluation;pipeline processing;",
    	note = "bristled CC-NUMA systems;virtual channels;high-end parallel systems;low-latency network communication;high-speed interconnection networks;system performance;network cost;scalability;processing nodes;parallel code efficiency;adaptive routing;congestion;traffic conditions;execution-driven simulation;dynamic superscalar processors;contemporary pipelined routers;bristled hypercubes;SPLASH-2 applications;",
    	pages = "202 - 9",
    	title = "{I}mproving the performance of bristled {CC}-{NUMA} systems using virtual channels and adaptivity",
    	url = "http://dx.doi.org/10.1145/305138.305194",
    	year = 1999
    }
    
  60. Federico Silla and Jose Duato. Is it worth the flexibility provided by irregular topologies in networks of workstations?. 1999, 47 - 61. BibTeX

    @conference{ 6439234,
    	author = "Silla, Federico and Duato, Jose",
    	abstract = "Networks of workstations (NOWs) are becoming a cost-effective alternative for small-scale parallel computing. Usually, NOWs present an irregular topology as a consequence of the needs in a local area network. Routing algorithms used in NOWs are inherently different from those used in regular networks, mainly due to the irregular connections between switches. In these algorithms, routing is considerably restricted in order to avoid deadlocks. Recently, a general methodology for the design of adaptive routing algorithms for irregular networks has been proposed by the authors. The resulting algorithms increase the maximum achievable throughput while reducing message latency. In this paper, we study how much network performance we are losing due to the irregular topology of NOWs. We analyze the performance of the up*/down* routing algorithm in a 2D mesh topology and compare it with the performance achieved by the XY routing scheme in the same network, in order to answer the following two questions: 1) in a 2D mesh, which of the two routing algorithms achieves better performance?, and 2) where does the up*/down* routing algorithm work better, in a 2D mesh or in an irregular network? Simulation results show that the up*/down* routing strategy performs better in a regular network than in an irregular one. On the other hand, the XY routing algorithm considerably outperforms the up*/down* scheme. However, when the adaptive routing algorithm proposed by the authors is used, differences in performance are much smaller. Thus, the higher performance of a regular topology could not compensate for the loss in wiring flexibility with respect to irregular networks, or their capability of adding a single switch at any moment",
    	address = "Berlin, Germany",
    	journal = "Network-Based Parallel Computing. Communication, Architecture, and Applications. Third International Workshop, CANPC'99 Proceedings",
    	keywords = "multiprocessor interconnection networks;network routing;network topology;performance evaluation;workstation clusters;",
    	note = "NOWs;networks of workstations;irregular topology;local area network;routing;adaptive routing algorithms;2D mesh topology;performance;",
    	pages = "47 - 61",
    	title = "{I}s it worth the flexibility provided by irregular topologies in networks of workstations?",
    	year = 1999
    }
    
  61. Jose Duato, Sudhakar Yalamanchili, M.Blanca Caminero, Damon Love and Francisco J Quiles. MMR: A high-performance multimedia router - architecture and design trade-offs. 1999, 300 - 309. BibTeX

    @conference{ 1999164582264,
    	author = "Duato, Jose and Sudhakar Yalamanchili and M.Blanca Caminero and Damon Love and Francisco J. Quiles",
    	abstract = "This paper presents the architecture of a router designed to efficiently support traffic generated by multimedia applications. The router is targeted for use in clusters and LANs rather than in WANs, the latter being served by communication substrates such as ATM. The distinguishing features of the proposed router architecture are the use of small fixed-size buffers, a large number of virtual channels, link-level virtual channel flow control, support for dynamic modification of connection bandwidth and priorities, and coordinated scheduling of connections across all output channels. The paper begins with a discussion of the design choices and architectural trade-offs made in the current MultiMedia Router (MMR) project. The performance evaluation section presents some preliminary results of the coordinated scheduling of constant bit rate (CBR) traffic streams.",
    	address = "Orlando, FL, USA",
    	journal = "IEEE High-Performance Computer Architecture Symposium Proceedings",
    	key = "Computer architecture",
    	keywords = "Bandwidth;Communication channels (information theory);Congestion control (communication);Local area networks;Multimedia systems;Routers;Telecommunication traffic;Wide area networks;",
    	note = "Constant bit rate (CBR);Coordination scheduling;Multimedia routers (MMR);",
    	pages = "300 - 309",
    	title = "{MMR}: {A} high-performance multimedia router - architecture and design trade-offs",
    	year = 1999
    }
    
  62. Jose Duato, S Yalamanchili, M B Caminero, D Love and F J Quiles. MMR: a high-performance MultiMedia Router-architecture and design trade-offs. 1999, 300 - 9. URL BibTeX

    @conference{ 6169107,
    	author = "Duato, Jose and S. Yalamanchili and M.B. Caminero and D. Love and F.J. Quiles",
    	abstract = "This paper presents the architecture of a router designed to efficiently support traffic generated by multimedia applications. The router is targeted for use in clusters and LANs rather than in WANs, the latter being served by communication substrates such as ATM. The distinguishing features of the proposed router architecture are the use of small fixed-size buffers, a large number of virtual channels, link-level virtual channel flow control, support for dynamic modification of connection bandwidth and priorities, and coordinated scheduling of connections across all output channels. The paper begins with a discussion of the design choices and architectural trade-offs made in the current MultiMedia Router (MMR) project. The performance evaluation section presents some preliminary results of the coordinated scheduling of constant bit rate (CBR) traffic streams",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings Fifth International Symposium on High-Performance Computer Architecture",
    	keywords = "local area networks;multimedia systems;multiprocessor interconnection networks;performance evaluation;",
    	note = "MMR;high-performance multimedia router;LANs;ATM;virtual channels;performance evaluation;coordinated scheduling;constant bit rate traffic streams;",
    	pages = "300 - 9",
    	title = "{MMR}: a high-performance {M}ulti{M}edia {R}outer-architecture and design trade-offs",
    	url = "http://dx.doi.org/10.1109/HPCA.1999.744383",
    	year = 1999
    }
    
  63. Pedro Lopez, Rosa Alcover, Jose Duato and L Zunica. Optimizing network throughput: optimal versus robust design. In Parallel and Distributed Processing, 1999. PDP '99. Proceedings of the Seventh Euromicro Workshop on. February 1999, 45 -52. URL, DOI BibTeX

    @conference{ 746644,
    	author = "Lopez, Pedro and Alcover, Rosa and Duato, Jose and L. Zunica",
    	abstract = "Interconnection network performance is usually measured in terms of its latency (time required to deliver a message) and throughput (maximum traffic accepted by the network). At first glance, minimizing average message latency is the main designer goal, because average network traffic is usually far from saturation. However, applications can also generate very high peak traffic. In order to deal with such situations, it is important that network throughput is also high. On the other hand, interconnection network performance depends on several parameters. Some of them can be chosen by the designer: routing algorithm, switching technique, topology and node design parameters. However, there are other parameters that cannot be selected by the designer. Among these, there are parameters that depend on the application, such as message size, message destination distribution and message traffic, as well as parameters defined by the customer, such as network size. Network designer can select the design parameters that maximize average (optimal design) or the design parameters that achieve a good performance under all the feasible combinations of the parameters that cannot be selected by him (robust design). Notice that both alternatives do not always lead to the same parameter configuration. Previously we chose the design parameters of a k-ary n-cube network considering optimize latency. In this case, optimal and robust design lead to the same choice. In this paper, we obtain these design parameters considering optimized network throughput. Unfortunately, there is a discrepancy between optimal and robust design criteria, being the former the best choice",
    	booktitle = "Parallel and Distributed Processing, 1999. PDP '99. Proceedings of the Seventh Euromicro Workshop on",
    	doi = "10.1109/EMPDP.1999.746644",
    	isbn = "0-7695-0059-5",
    	issn = "1066-6192",
    	keywords = "average message latency;average network traffic;interconnection network performance;latency;message destination distribution;network throughput optimisation;node design parameters;optimal design;parameter configuration;robust design;routing algorithm;swit",
    	month = "feb",
    	pages = "45 -52",
    	title = "{O}ptimizing network throughput: optimal versus robust design",
    	url = "http://dx.doi.org/10.1109/EMPDP.1999.746644",
    	year = 1999
    }
    
  64. Pedro Lopez, Rosa Alcover, Jose Duato and L Zunica. Optimizing network throughput: optimal versus robust design. 1999, 45 - 52. URL BibTeX

    @conference{ 6169182,
    	author = "Lopez, Pedro and Alcover, Rosa and Duato, Jose and L. Zunica",
    	abstract = "Interconnection network performance is usually measured in terms of its latency (time required to deliver a message) and throughput (maximum traffic accepted by the network). At first glance, minimizing average message latency is the main designer goal, because average network traffic is usually far from saturation. However, applications can also generate very high peak traffic. In order to deal with such situations, it is important that network throughput is also high. On the other hand, interconnection network performance depends on several parameters. Some of them can be chosen by the designer: routing algorithm, switching technique, topology and node design parameters. However, there are other parameters that cannot be selected by the designer. Among these, there are parameters that depend on the application, such as message size, message destination distribution and message traffic, as well as parameters defined by the customer, such as network size. Network designer can select the design parameters that maximize average (optimal design) or the design parameters that achieve a good performance under all the feasible combinations of the parameters that cannot be selected by him (robust design). Notice that both alternatives do not always lead to the same parameter configuration. Previously we chose the design parameters of a k-ary n-cube network considering optimize latency. In this case, optimal and robust design lead to the same choice. In this paper, we obtain these design parameters considering optimized network throughput. Unfortunately, there is a discrepancy between optimal and robust design criteria, being the former the best choice",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings of the Seventh Euromicro Workshop on Parallel and Distributed Processing. PDP'99",
    	keywords = "multiprocessor interconnection networks;performance evaluation;telecommunication network routing;",
    	note = "network throughput optimisation;robust design;optimal design;interconnection network performance;latency;average message latency;average network traffic;routing algorithm;switching technique;node design parameters;message destination distribution;parameter configuration;",
    	pages = "45 - 52",
    	title = "{O}ptimizing network throughput: optimal versus robust design",
    	url = "http://dx.doi.org/10.1109/EMPDP.1999.746644",
    	year = 1999
    }
    
  65. Rafael Casado, Aurelio Bermudez, Francisco J Quiles, Jose L Sanchez and Jose Duato. Performance evaluation of dynamic reconfiguration in high-speed local area networks. 1999, 85 - 96. BibTeX

    @conference{ 2000215131150,
    	author = "Rafael Casado and Aurelio Bermudez and Francisco J. Quiles and Jose L. Sanchez and Duato, Jose",
    	abstract = "A new deadlock-free distributed reconfiguration algorithm that is able to asynchronously update routing tables without stopping user traffic is proposed. The algorithm is valid for any topology, including regular as well as irregular topologies. Simulation results show that the behavior of the algorithm is significantly better than for other algorithms based on a spanning-tree formation.",
    	address = "Toulouse, France",
    	journal = "IEEE High-Performance Computer Architecture Symposium Proceedings",
    	key = "Local area networks",
    	keywords = "Algorithms;Computer simulation;Computer system recovery;Congestion control (communication);Distributed computer systems;Electric network topology;Multimedia systems;Packet switching;Performance;Telecommunication traffic;",
    	note = "Deadlock free distributed reconfiguration algorithm;Network interface card;Quality of service;Spanning free formation;",
    	pages = "85 - 96",
    	title = "{P}erformance evaluation of dynamic reconfiguration in high-speed local area networks",
    	year = 1999
    }
    
  66. R Casado, A Bermudez, F J Quiles, J L Sanchez and Jose Duato. Performance evaluation of dynamic reconfiguration in high-speed local area networks. 1999, 85 - 96. URL BibTeX

    @conference{ 6498655,
    	author = "R. Casado and A. Bermudez and F.J. Quiles and J.L. Sanchez and Duato, Jose",
    	abstract = "High-speed local area networks (LANs) consist of a set of switches connected by point-to-point links, and hosts linked to switches through a network interface card. High-speed LANs may change their topology due to switches and hosts being turned on/off, link remapping, and component failures. In these cases, a distributed reconfiguration algorithm analyzes the topology, computes the new routing tables, and downloads them to the corresponding switches. Unfortunately, in most cases, user traffic is stopped during the reconfiguration process to avoid deadlock. Although network reconfigurations are not frequent, static reconfiguration such as this may take hundreds of milliseconds to execute, thus degrading system availability significantly. In this paper, we propose a new deadlock-free distributed reconfiguration algorithm that is able to asynchronously update routing tables without stopping user traffic. This algorithm is valid for any topology, including regular as well as irregular topologies. Simulation results show that the behavior of our algorithm is significantly better than for other algorithms based on a spanning-tree formation",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings Sixth International Symposium on High-Performance Computer Architecture. HPCA-6 (Cat. No.PR00550)",
    	keywords = "concurrency control;digital simulation;local area networks;performance evaluation;quality of service;system recovery;",
    	note = "performance evaluation;dynamic reconfiguration;high-speed local area networks;point-to-point links;network interface card;link remapping;component failures;distributed reconfiguration algorithm;deadlock;simulation results;spanning-tree formation;",
    	pages = "85 - 96",
    	title = "{P}erformance evaluation of dynamic reconfiguration in high-speed local area networks",
    	url = "http://dx.doi.org/10.1109/HPCA.2000.824341",
    	year = 1999
    }
    
  67. Jose Flich, M P Malumbres, Pedro Lopez and Jose Duato. Performance evaluation of networks of workstations with hardware shared memory model using execution-driven simulation. In Parallel Processing, 1999. Proceedings. 1999 International Conference on. 1999, 146 -153. DOI BibTeX

    @conference{ 797399,
    	author = "Flich, Jose and M.P. Malumbres and Lopez, Pedro and Duato, Jose",
    	abstract = "Networks of workstations (NOWs) are becoming increasingly popular as a cost-effective alternative to parallel computers. Typically, these networks connect processors using irregular topologies, providing the wiring flexibility, scalability, and incremental expansion capability required in this environment. Similar to the evolution of parallel computers, NOWs are also evolving from distributed memory to shared memory programming model. However, physical distances between processors are longer in NOWs than in tightly-coupled distributed shared-memory multiprocessors (DSMs), leading to higher message latency and lower network bandwidth. Therefore, the network may be a bottleneck when executing some parallel applications in a NOW supporting a shared-memory programming paradigm. In this paper we analyze whether the interconnection network is able to efficiently handle the traffic generated in a NOW with the shared memory model. In particular, we are interested in analyzing the influence of the routing mechanism in the performance of the system. We evaluate the behavior of a NOW with irregular topology by means of an execution-driven simulator using SPLASH-2 applications as the input load. The results show that the routing algorithm can considerably reduce the total execution time of applications. In particular routing adaptivity can reduce the total execution time by 58% in some applications. These results confirm the behavior observed in previous works using synthetic traffic loads",
    	booktitle = "Parallel Processing, 1999. Proceedings. 1999 International Conference on",
    	doi = "10.1109/ICPP.1999.797399",
    	keywords = "SPLASH-2;distributed shared-memory multiprocessors;execution-driven simulation;execution-driven simulator;hardware shared memory model;incremental expansion capability;interconnection network;irregular topologies;message latency;networks of workstations;p",
    	pages = "146 -153",
    	title = "{P}erformance evaluation of networks of workstations with hardware shared memory model using execution-driven simulation",
    	year = 1999
    }
    
  68. B Caminero, F J Quiles, Jose Duato, D S Love and S Yalamanchili. Performance evaluation of the multimedia router with MPEG-2 video traffic. 1999, 62 - 76. BibTeX

    @conference{ 6429570,
    	author = "B. Caminero and F.J. Quiles and Duato, Jose and D.S. Love and S. Yalamanchili",
    	abstract = "The Multimedia Router (MMR) architecture is aimed at providing QoS to multimedia traffic in a local area environment, while retaining a compact and simple design. In this paper, we show some preliminary performance evaluation results. The workload was composed of a mix of synthetic CBR traffic and semi-synthetic VBR traffic. The latter was obtained from real MPEG-2 video sequences. We show that, with a simple scheduling algorithm, amenable for single-chip implementation, the link bandwidth utilization is quite satisfactory, while still providing acceptable delays to both CBR and VBR traffic",
    	address = "Berlin, Germany",
    	journal = "Network-Based Parallel Computing. Communication, Architecture, and Applications. Third International Workshop, CANPC'99 Proceedings",
    	keywords = "local area networks;multimedia communication;performance evaluation;",
    	note = "performance evaluation;multimedia router;MPEG-2 video traffic;multimedia traffic;workload;scheduling;",
    	pages = "62 - 76",
    	title = "{P}erformance evaluation of the multimedia router with {MPEG}-2 video traffic",
    	year = 1999
    }
    
  69. Maria E Gomez and Vicente Santonja. Self-similarity in I/O workload: analysis and modeling. 1999, 97 - 104. URL, DOI BibTeX

    @conference{ 6452561,
    	author = "Gomez, Maria E. and Santonja, Vicente",
    	abstract = "Recently, the notion of self-similarity has been applied to wide-area and local-area network traffic. This paper demonstrates that disk-level I/O requests are self-similar in nature. We show evidence, both visual and mathematical, that the I/O accesses are consistent with self-similarity. Moreover, we show that this property of I/O accesses is mainly due to writes. For our experiments, we use two sets of traces that collect the disk activity from two systems over a period of two months. Such behavior has serious implications for the performance evaluation of storage subsystem designs and implementations, since commonly-used simplifying assumptions about workload characteristics (e.g. Poisson arrivals) are shown to be incorrect. Using the ON/OFF model, we implement a disk request generator. The inputs of this generator are the measured properties of the available trace data. We analyze the synthesized workload and confirm that it exhibits the correct self-similar behavior",
    	address = "Los Alamitos, CA, USA",
    	doi = "10.1109/WWC.1998.809365",
    	journal = "Workload Characterization: Methodology and Case Studies. Based on the First Workshop on Workload Characterization",
    	keywords = "computer networks;disc storage;fractals;performance evaluation;queueing theory;telecommunication traffic;",
    	note = "self-similarity;I/O workload;wide-area network traffic;local-area network traffic;disk-level I/O requests;I/O access;writes;trace data;disk activity collection;performance evaluation;storage subsystem designs;storage subsystem implementations;Poisson arrivals;ON/OFF model;disk request generator;",
    	pages = "97 - 104",
    	title = "{S}elf-similarity in {I}/{O} workload: analysis and modeling",
    	url = "http://dx.doi.org/10.1109/WWC.1998.809365",
    	year = 1999
    }
    
  70. Julio Sahuquillo and A Pont. The filter cache: a run-time cache management approach. In EUROMICRO Conference, 1999. Proceedings. 25th 1. 1999, 424 -431. URL, DOI BibTeX

    @conference{ 794504,
    	author = "Sahuquillo, Julio and A. Pont",
    	abstract = "This work presents a new hardware cache management approach for improving the cache hit ratio and reducing the bus traffic. Increasing the L1 cache hit ratio is a crucial aspect of obtaining good performance with the current processors. The proposed approach also increases the overall (L1 plus L2) cache hit ratio, especially in multiprocessor systems, where the bus latencies are low. This work focuses in multiprocessor systems where a forth kind of miss (the coherence miss) and the bus utilization problem appear; however, the model can also be applied to uniprocessor systems. Our organization increases the overall cache hit ratio and thus reduces the bus utilization. The proposed model introduces two independent L1 caches with different organizations placed in parallel. Each cache block has attached to it a small counter for storing the reuse related information. The proposed microarchitecture not only reduces the bus traffic and speeds up better than the conventional organization, but it also saves die area. The performance (versus conventional cache organizations) increases as the number of processors increases",
    	booktitle = "EUROMICRO Conference, 1999. Proceedings. 25th",
    	doi = "10.1109/EURMIC.1999.794504",
    	isbn = "0-7695-0321-7",
    	keywords = "bus traffic;cache hit ratio;data cache management;data locality;filter cache;hardware cache management;memory architectures;multi-lateral cache;multiprocessor systems;performance;run-time cache management;cache storage;memory architecture;performance eval",
    	pages = "424 -431",
    	title = "{T}he filter cache: a run-time cache management approach",
    	url = "http://dx.doi.org/10.1109/EURMIC.1999.794504",
    	volume = 1,
    	year = 1999
    }
    
  71. Julio Sahuquillo and A Pont. The split data cache in multiprocessor systems: an initial hit ratio analysis. In Parallel and Distributed Processing, 1999. PDP '99. Proceedings of the Seventh Euromicro Workshop on. February 1999, 27 -34. URL, DOI BibTeX

    @conference{ 746641,
    	author = "Sahuquillo, Julio and A. Pont",
    	abstract = "As current first level (L1) data caches are poorly and inefficiently managed, new approaches to achieve better performance in uniprocessor systems have been proposed. The L1 data cache management system is basically the same as it was three decades ago. New organizations have recently been proposed, where two multi-lateral caches are included in the first level in accordance with the data locality where they are stored. The processor simultaneously sends the same memory request to both caches located in L1. These caches work independently and have different organizations. The main objective is to minimize the average data access time. These new organizations will normally increase the hit ratio. Additionally, the chip area occupied by these caches-including the necessary management hardware-is smaller than in a conventional organization. As the proposed cache size is smaller, it can work faster and improve access time at this level. Several authors have studied different approaches around this idea in uniprocessors. In this work we have made extensions for shared memory multiprocessors and studied the advantages",
    	booktitle = "Parallel and Distributed Processing, 1999. PDP '99. Proceedings of the Seventh Euromicro Workshop on",
    	doi = "10.1109/EMPDP.1999.746641",
    	isbn = "0-7695-0059-5",
    	issn = "1066-6192",
    	keywords = "L1 data cache management;data caches;hit ratio analysis;multiprocessor systems;performance;shared memory multiprocessors;split data cache;cache storage;performance evaluation;shared memory systems;",
    	month = "feb",
    	pages = "27 -34",
    	publisher = "IEEE Computer Society",
    	title = "{T}he split data cache in multiprocessor systems: an initial hit ratio analysis",
    	url = "http://dx.doi.org/10.1109/EMPDP.1999.746641",
    	year = 1999
    }
    
  72. J M Orduna and Jose Duato. A high performance router architecture for multimedia applications. 1998, 142 - 9. URL BibTeX

    @conference{ 5973112,
    	author = "J.M. Orduna and Duato, Jose",
    	abstract = "Parallel computing systems and network of workstations (NOW) are being used nowadays for on-line multimedia applications. The potential market of such applications seems to be large enough to justify a specific architecture oriented to support them more efficiently. Wave switching is a hybrid switching technique for high performance routers that combines wormhole switching and circuit switching in the same router architecture. This switching technique is very well suited for parallel computers and NOWs using optical interconnections. In this paper we propose the use of wave switching for applications like distributed multimedia systems or MPEG video encoding. These applications generate an intensive, bursty traffic together with a small percentage of control message traffic. For this kind of traffic, wave switching can considerably improve the throughput of parallel computing systems. Performance evaluation results for a MPEG video encoding application show a drastic reduction in latency and an improvement in throughput, making easier for these systems to support real-time constraints",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings. Fifth International Conference on Massively Parallel Processing (Cat. No.98EX182)",
    	keywords = "distributed memory systems;multimedia communication;multiprocessor interconnection networks;optical interconnections;",
    	note = "network of workstations;on-line multimedia;router architecture;wormhole switching;circuit switching;wave switching;distributed multimedia;MPEG video encoding;parallel computing;",
    	pages = "142 - 9",
    	title = "{A} high performance router architecture for multimedia applications",
    	url = "http://dx.doi.org/10.1109/MPPOI.1998.682137",
    	year = 1998
    }
    
  73. R Garcia, Jose Duato and J J Serrano. A new transparent bridge protocol for LAN internetworking using topologies with active loops. 1998, 295 - 303. URL BibTeX

    @conference{ 6034722,
    	author = "R. Garcia and Duato, Jose and J.J. Serrano",
    	abstract = "This paper proposes a new transparent bridge protocol for LAN interconnection that considerably improves the performance of current standard IEEE-802.1D bridges. The current standard is based on the Spanning Tree (ST) algorithm and the most important restriction is that it cannot work when the topology has active loops. The new protocol (named OSR for Optimal-Suboptimal Routing) allows them. Therefore, strongly connected regular topologies, like torus, hypercubes, meshes, etc., as well as irregular topologies, can be used without wasting bandwidth. As loops imply alternative paths, the OSR protocol uses optimal routing or in the worst cases, suboptimal routing. The new protocol has been evaluated on highly connected regular topologies, like meshes. The results are compared with those of a network of the same size managed by the standard spanning tree protocol, showing the superior behavior of the OSR protocol",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings. 1998 International Conference on Parallel Processing (Cat. No.98EX205)",
    	keywords = "hypercube networks;LAN interconnection;performance evaluation;protocols;",
    	note = "transparent bridge protocol;LAN internetworking;active loops topologies;performance;standard IEEE-802.1D bridges;spanning tree algorithm;torus;hypercubes;meshes;highly connected regular topologies;spanning tree protocol;",
    	pages = "295 - 303",
    	title = "{A} new transparent bridge protocol for {LAN} internetworking using topologies with active loops",
    	url = "http://dx.doi.org/10.1109/ICPP.1998.708499",
    	year = 1998
    }
    
  74. R Casado, B Caminero, P Cuenca, F Quiles, A Garrido and Jose Duato. A tool for the analysis of reconfiguration and routing algorithms in irregular networks. 1998, 159 - 73. BibTeX

    @conference{ 5959258,
    	author = "R. Casado and B. Caminero and P. Cuenca and F. Quiles and A. Garrido and Duato, Jose",
    	abstract = "High performance interconnection networking is one of the most active research fields in the area of communications. Their quick development has been increased by the interest in using multiple workstations in parallel processing. These local networks use ideas that are already successfully applied in parallel computer interconnection networks. However, their more flexible and dynamic environment exposes new problems, such as topology configuration and message routing, which are difficult to solve with the current methods used in regular networks. Therefore, it is advisable to apply tools that help the researcher to develop and verify the behaviour of new algorithms for these new networks. Nowadays, the RAAP group (Redes y Arquitecturas de Altas Prestaciones, High Performance Networks and Architectures) of the University of Castilla-La Mancha is working in this way. In this paper, we present a software tool developed by the RAAP group with the aim of helping in the research. It does not try to simulate the communications within the network (where a long computation process would not be able to guarantee none of its properties) but to analyze its behaviour, through the channel dependency graph. The result is an agile and practical tool that provides conclusions in a quick and reliable way",
    	address = "Berlin, Germany",
    	journal = "Network-Based Parallel Computing. Communication, Architecture, and Applications. Second International Workshop, CANPC '98 Proceedings",
    	keywords = "local area networks;parallel processing;reconfigurable architectures;software tools;workstations;",
    	note = "routing algorithms;reconfiguration algorithms;irregular networks;high performance interconnection networking;multiple workstations;local networks;parallel computer interconnection networks;dynamic environment;topology configuration;message routing;software tool;computation process;channel dependency graph;",
    	pages = "159 - 73",
    	title = "{A} tool for the analysis of reconfiguration and routing algorithms in irregular networks",
    	year = 1998
    }
    
  75. Pedro Lopez, J M Martinez and Jose Duato. A very efficient distributed deadlock detection mechanism for wormhole networks. 1998, 57 - 66. URL BibTeX

    @conference{ 5842955,
    	author = "Lopez, Pedro and J.M. Martinez and Duato, Jose",
    	abstract = "Networks using wormhole switching have traditionally relied upon deadlock avoidance strategies for the design of routing algorithms. More recently, deadlock recovery strategies have begun to gain acceptance. Progressive deadlock recovery techniques are very attractive because they allocate a few dedicated resources to quickly deliver deadlocked messages, instead of killing them. However, the distributed deadlock detection techniques proposed up to now detect many false deadlocks, especially when the network is heavily loaded and messages have different lengths. As a consequence, messages detected as deadlocked may saturate the bandwidth offered by recovery resources, thus degrading performance considerably. In this paper we propose an improved distributed deadlock detection mechanism that uses only local information, detects all the deadlocks, considerably reduces the probability of false deadlock detection and is not strongly affected by variations in message length and message destination distribution",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings 1998 Fourth International Symposium on High-Performance Computer Architecture (Cat. No.98TB100224)",
    	keywords = "multiprocessor interconnection networks;performance evaluation;system recovery;",
    	note = "distributed deadlock detection mechanism;wormhole networks;wormhole switching;deadlock avoidance strategies;routing algorithms;deadlock recovery strategies;deadlock recovery techniques;performance degradation;local information;false deadlock detection;message length;message destination distribution;",
    	pages = "57 - 66",
    	title = "{A} very efficient distributed deadlock detection mechanism for wormhole networks",
    	url = "http://dx.doi.org/10.1109/HPCA.1998.650546",
    	year = 1998
    }
    
  76. Pedro Lopez, Rosa Alcover, Jose Duato and L Zunica. Cost-effective methodology for the evaluation of interconnection networks. Journal of Systems Architecture 44(9-10):815 - 830, 1998. URL BibTeX

    @article{ 1998384306573,
    	author = "Lopez, Pedro and Alcover, Rosa and Duato, Jose and L. Zunica",
    	abstract = "Interconnection network performance depends on several parameters, including network design parameters, network size, message traffic and message length. Simulation is the methodology usually followed in evaluation studies, because the model can more faithfully represent hardware implementation, taking into account more details. Nevertheless, the number of parameter combinations is often very high, and simulations also take long to complete. Therefore, evaluation studies must choose a subset of the parameters and restrict the variability of each of them. In this paper, we propose a methodology for evaluating interconnection networks. It is based on experimental design used in statistical studies. Using this methodology, we can study network behavior considering many parameters, running only a subset of the simulations required to study all the combinations. In addition, the methodology permits to quantify the effect of interactions among the parameters. We apply this methodology to adjust node design parameters such as number of virtual channels, input buffer size, and output buffer size for a 8-ary 3-cube with adaptive (both partially and fully) wormhole routing. We show that running only one third of the simulations required to study all the combinations, the most significant effects can be estimated without a noticeable loss in precision.",
    	address = "Amsterdam, Netherlands",
    	issn = 13837621,
    	journal = "Journal of Systems Architecture",
    	key = "Interconnection networks",
    	keywords = "Buffer storage;Communication channels (information theory);Computer simulation;Cost effectiveness;Data communication systems;Statistical methods;Telecommunication traffic;",
    	note = "Adaptive routing;Virtual channels;Wormhole routing;",
    	number = "9-10",
    	pages = "815 - 830",
    	title = "{C}ost-effective methodology for the evaluation of interconnection networks",
    	url = "http://dx.doi.org/10.1016/S1383-7621(97)00019-2",
    	volume = 44,
    	year = 1998
    }
    
  77. Jose Duato. Deadlock avoidance and adaptive routing in interconnection networks. 1998, 359 - 64. URL BibTeX

    @conference{ 5842933,
    	author = "Duato, Jose",
    	abstract = "Networks of workstations are rapidly emerging as a cost effective alternative to parallel computers. Switch based interconnects with irregular topologies allow the wiring flexibility, scalability and incremental expansion capability required in this environment. The irregularity also makes routing and deadlock avoidance on such systems quite complicated. Current proposals avoid deadlock by removing cyclic dependencies between channels. As a consequence, many messages are routed following non minimal paths, therefore increasing latency and wasting resources. We describe a methodology for the design of adaptive routing algorithms for networks with irregular topology. The resulting algorithms allow messages to follow minimal paths in most cases, reducing message latency and balancing channel utilization. The proposed routing algorithms can be implemented simply by changing the routing tables and adding some links in parallel with existing links, taking advantage of spare switch ports. Alternatively, routing algorithms can be implemented by designing new switches that support virtual channels. Evaluation results show that the new routing algorithms are able to increase throughput by a factor of more than four for random traffic, also reducing latency",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings of the Sixth Euromicro Workshop on Parallel and Distributed Processing - PDP'98 - (Cat. No.98EX134)",
    	keywords = "concurrency control;message passing;multiprocessor interconnection networks;workstations;",
    	note = "deadlock avoidance;adaptive routing;interconnection networks;networks of workstations;switch based interconnects;irregular topologies;wiring flexibility;incremental expansion capability;cyclic dependencies;message routing;irregular topology;message latency;channel utilization;spare switch ports;virtual channels;random traffic;",
    	pages = "359 - 64",
    	title = "{D}eadlock avoidance and adaptive routing in interconnection networks",
    	url = "http://dx.doi.org/10.1109/EMPDP.1998.647220",
    	year = 1998
    }
    
  78. Pedro Lopez, Juan Miguel Martínez and Jose Duato. DRIL: dynamically reduced message injection limitation mechanism for wormhole networks. In Parallel Processing, 1998. Proceedings. 1998 International Conference on. August 1998, 535 -542. URL, DOI BibTeX

    @conference{ 708527,
    	author = "Lopez, Pedro and Mart{\'i}nez, Juan Miguel and Duato, Jose",
    	abstract = "Deadlock avoidance and recovery techniques are alternatives to deal with the interconnection network deadlock problem. Both techniques allow fully adaptive routing on some set of resources while providing dedicated resources to escape from deadlock. They mainly differ in the way they supply escape paths and when those paths are used. As the escape paths only provide limited bandwidth to escape from deadlocks, both techniques suffer from severe performance degradation when the network is close to saturation. On the other hand, deadlock recovery is based on the assumption that deadlocks are rare. Several studies show that deadlock are more prone when the network is close to or beyond saturation. In this paper we propose a new mechanism that prevents network saturation by dynamically adjusting message injection limitation into the network. As a consequence, this mechanism will avoid the performance degradation problem that typically occurs in both deadlock avoidance and recovery techniques, making fully adaptive feasible. Also, it will guarantee that the frequency of deadlock is really negligible, allowing the use of simple low-cost recovery strategies",
    	booktitle = "Parallel Processing, 1998. Proceedings. 1998 International Conference on",
    	doi = "10.1109/ICPP.1998.708527",
    	isbn = "0-8186-8650-2",
    	issn = "0190-3918",
    	keywords = "DRIL;deadlock avoidance;interconnection network deadlock;message injection limitation;network saturation;performance degradation;recovery techniques;wormhole networks;concurrency control;multiprocessor interconnection networks;performance evaluation;syste",
    	month = "aug",
    	pages = "535 -542",
    	title = "{DRIL}: dynamically reduced message injection limitation mechanism for wormhole networks",
    	url = "http://dx.doi.org/10.1109/ICPP.1998.708527",
    	year = 1998
    }
    
  79. Pedro Lopez, J M Martinez and Jose Duato. DRIL: dynamically reduced message injection limitation mechanism for wormhole networks. 1998, 535 - 42. URL BibTeX

    @conference{ 6034749,
    	author = "Lopez, Pedro and J.M. Martinez and Duato, Jose",
    	abstract = "Deadlock avoidance and recovery techniques are alternatives to deal with the interconnection network deadlock problem. Both techniques allow fully adaptive routing on some set of resources while providing dedicated resources to escape from deadlock. They mainly differ in the way they supply escape paths and when those paths are used. As the escape paths only provide limited bandwidth to escape from deadlocks, both techniques suffer from severe performance degradation when the network is close to saturation. On the other hand, deadlock recovery is based on the assumption that deadlocks are rare. Several studies show that deadlock are more prone when the network is close to or beyond saturation. In this paper we propose a new mechanism that prevents network saturation by dynamically adjusting message injection limitation into the network. As a consequence, this mechanism will avoid the performance degradation problem that typically occurs in both deadlock avoidance and recovery techniques, making fully adaptive feasible. Also, it will guarantee that the frequency of deadlock is really negligible, allowing the use of simple low-cost recovery strategies",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings. 1998 International Conference on Parallel Processing (Cat. No.98EX205)",
    	keywords = "concurrency control;multiprocessor interconnection networks;performance evaluation;system recovery;",
    	note = "DRIL;wormhole networks;interconnection network deadlock;network saturation;message injection limitation;performance degradation;deadlock avoidance;recovery techniques;",
    	pages = "535 - 42",
    	title = "{DRIL}: dynamically reduced message injection limitation mechanism for wormhole networks",
    	url = "http://dx.doi.org/10.1109/ICPP.1998.708527",
    	year = 1998
    }
    
  80. Jose Flich, Pedro Lopez, M P Malumbres and Jose Duato. Edinet: an execution driven interconnection network simulator for DSM systems. 1998, 336 - 9. BibTeX

    @conference{ 6161583,
    	author = "Flich, Jose and Lopez, Pedro and M.P. Malumbres and Duato, Jose",
    	abstract = "Evaluation studies on interconnection networks for distributed memory multiprocessors usually assume synthetic or trace-driven workloads. However, when the final design choices must be done a more precise evaluation study should be performed. In this paper, we describe a new execution-driven simulation tool to evaluate interconnection networks for distributed memory multiprocessors using real application workloads. As an example, we have developed a NCC-NUMA memory model and obtained some simulation results from the SPLASH-2 suite, using different network routing algorithms",
    	address = "Berlin, Germany",
    	journal = "Computer Performance Evaluation. Modelling Techniques and Tools. 10th International Conference, Tools'98. Proceedings",
    	keywords = "discrete event simulation;distributed shared memory systems;multiprocessor interconnection networks;performance evaluation;",
    	note = "Edinet;execution driven interconnection network simulator;distributed memory multiprocessors;trace-driven workloads;execution-driven simulation tool;NCC-NUMA memory model;simulation results;SPLASH-2 suite;network routing algorithms;",
    	pages = "336 - 9",
    	title = "{E}dinet: an execution driven interconnection network simulator for {DSM} systems",
    	year = 1998
    }
    
  81. Federico Silla, M P Malumbres, Jose Duato, D Dai and D K Panda. Impact of adaptivity on the behavior of networks of workstations under bursty traffic. 1998, 88 - 95. URL BibTeX

    @conference{ 6034698,
    	author = "Silla, Federico and M.P. Malumbres and Duato, Jose and D. Dai and D.K. Panda",
    	abstract = "Networks of workstations (NOWs) are becoming increasingly popular as an alternative to parallel computers. Typically, these networks present irregular topologies, providing the wiring flexibility, scalability, and incremental expansion capability required in this environment. Similar to the evolution of parallel computers, NOWs are also evolving from distributed memory to shared memory. However distances between processors are longer in NOWs, leading to higher message latency and lower network bandwidth. Therefore, one can expect the network to be a bottleneck when executing some parallel applications on a NOW supporting a shared-memory programming paradigm. The authors analyze whether the interconnection network in a NOW is able to efficiently handle the traffic generated in a DSM with the same number of processors. They evaluate the behavior of a NOW using application traces captured during the execution of several SPLASH2 applications on a DSM simulator. They show through simulation that the adaptive routing algorithm previously proposed by them almost eliminates network saturation due to its ability to support a higher sustained throughput. Therefore, adaptive routing becomes a key design issue to achieve similar performance in NOWs and tightly-coupled DSMs",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings. 1998 International Conference on Parallel Processing (Cat. No.98EX205)",
    	keywords = "distributed memory systems;local area networks;parallel processing;shared memory systems;telecommunication network routing;telecommunication traffic;virtual machines;workstations;",
    	note = "workstation network behaviour;bursty traffic;adaptivity;irregular topologies;wiring flexibility;wiring scalability;incremental expansion capability;distributed memory;shared memory;message latency;network bandwidth;parallel applications;shared-memory programming paradigm;interconnection network;traffic handling;application traces;SPLASH2 applications;simulator;adaptive routing algorithm;network saturation;",
    	pages = "88 - 95",
    	title = "{I}mpact of adaptivity on the behavior of networks of workstations under bursty traffic",
    	url = "http://dx.doi.org/10.1109/ICPP.1998.708467",
    	year = 1998
    }
    
  82. Julio Sahuquillo and A Pont. Impact of reducing miss write latencies in multiprocessors with two level cache. In Euromicro Conference, 1998. Proceedings. 24th 1. August 1998, 333 -336. URL, DOI BibTeX

    @conference{ 711822,
    	author = "Sahuquillo, Julio and A. Pont",
    	abstract = "In this paper a multiprocessor system with a two-level cache hierarchy is modeled and extensions of two write invalidate snoopy protocols are implemented in the L2 cache controller for coherence maintenance. The paper focuses on the use of different techniques for reducing miss penalty and a comparative performance study is done for each possibility. To solve efficiently a miss read, the early restart technique is implemented in the second level of cache hierarchy and the critical word first technique is used in the first level cache controller. To obtain better performance in the case of a write miss the write allocate technique is implemented at the L2 cache controller. Two models, with different L1 cache controllers are considered in our study, one of them using the non-write allocate technique and the other using the write allocate. We show that the write allocate and non-write allocate techniques are independent over the processors number. The major conclusion of this work is that the non-write allocate technique is not only less complex for implementation but also better in performance if the L1 write miss rate represents a high percentage of L1 miss rate",
    	booktitle = "Euromicro Conference, 1998. Proceedings. 24th",
    	doi = "10.1109/EURMIC.1998.711822",
    	keywords = "L2 cache controller;coherence maintenance;critical word first technique;early restart technique;miss write latencies;multiprocessor system;nonwrite allocate technique;performance study;two-level cache hierarchy;write allocate technique;write invalidate sn",
    	month = "aug",
    	pages = "333 -336",
    	title = "{I}mpact of reducing miss write latencies in multiprocessors with two level cache",
    	url = "http://dx.doi.org/10.1109/EURMIC.1998.711822",
    	volume = 1,
    	year = 1998
    }
    
  83. Federico Silla, Antonio Robles and Jose Duato. Improving performance of networks of workstations by using Disha Concurrent. In TH Lai (ed.). 1998 INTERNATIONAL CONFERENCE ON PARALLEL PROCESSING - PROCEEDINGS. 1998, 80-87. BibTeX

    @conference{ isi:000075698400010,
    	author = "Silla, Federico and Robles, Antonio and Duato, Jose",
    	abstract = "Networks of workstations are currently emerging as a cost-effective alternative to parallel computers. Recently, deadlock recovery techniques have been shown to be an alternative to deadlock avoidance. Disha Concurrent is a progressive deadlock recovery scheme able to simultaneously redirect several deadlocked messages through a deadlock-free lane. Unlike deadlock avoidance techniques, Disha provides true fully adaptive routing without using virtual channels to guarantee deadlock freedom. In this paper, we analyze the application of Disha to networks of workstations. We propose an implementation of Disha on irregular networks that allows concurrent deadlock recovery, proving that this implementation is always able to recover from deadlock. A new switch organization and a new flow control protocol are proposed to support Disha. Performance evaluation results shaw that applying Disha to irregular networks increases network throughput by a factor of up to 3.5, and also reduces latency with regard to other routing algorithms based on deadlock avoidance techniques.",
    	booktitle = "1998 INTERNATIONAL CONFERENCE ON PARALLEL PROCESSING - PROCEEDINGS",
    	editor = "Lai, TH",
    	isbn = 0818686510,
    	issn = "0190-3918",
    	note = "International Conference on Parallel Processing (ICPP), MINNEAPOLIS, MN, AUG 10-14, 1998",
    	pages = "80-87",
    	series = "PROCEEDINGS OF THE INTERNATIONAL CONFERENCE ON PARALLEL PROCESSING",
    	title = "{I}mproving performance of networks of workstations by using {D}isha {C}oncurrent",
    	year = 1998
    }
    
  84. Federico Silla, Antonio Robles and Jose Duato. Improving performance of networks of workstations by using Disha Concurrent. 1998, 80 - 7. URL BibTeX

    @conference{ 6034697,
    	author = "Silla, Federico and Robles, Antonio and Duato, Jose",
    	abstract = "Networks of workstations are currently emerging as a cost-effective alternative to parallel computers. Recently, deadlock recovery techniques have been shown to be an alternative to deadlock avoidance. Disha Concurrent is a progressive deadlock recovery scheme able to simultaneously redirect several deadlocked messages through a deadlock-free lane. Unlike deadlock avoidance techniques, Disha provides true fully adaptive routing without using virtual channels to guarantee deadlock freedom. In this paper, we analyze the application of Disha to networks of workstations. We propose an implementation of Disha on irregular networks that allows concurrent deadlock recovery proving that this implementation is always able to recover from deadlock. A new switch organization and a new flow control protocol are proposed to support Disha. Performance evaluation results show that applying Disha to irregular networks increases network throughput by a factor of up to 3.5, and also reduces latency with regard to other routing algorithms based on deadlock avoidance techniques",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings. 1998 International Conference on Parallel Processing (Cat. No.98EX205)",
    	keywords = "concurrency control;local area networks;parallel processing;performance evaluation;system recovery;workstations;",
    	note = "performance improvement;networks of workstations;Disha Concurrent;deadlock recovery techniques;deadlock avoidance;flow control protocol;latency;",
    	pages = "80 - 7",
    	title = "{I}mproving performance of networks of workstations by using {D}isha {C}oncurrent",
    	url = "http://dx.doi.org/10.1109/ICPP.1998.708466",
    	year = 1998
    }
    
  85. Vicente Santonja, Xavier Molero, Marina Alonso, J J Serrano and P Gil. Influence of on-line spare disks and duplicated controllers on RAID dependability. In Dependable Computing for Critical Applications 6. 1998, 249 - 70. BibTeX

    @conference{ 6369403,
    	author = "Santonja, Vicente and Molero, Xavier and Alonso, Marina and J.J. Serrano and P. Gil",
    	abstract = "This paper shows how the use of stochastic activity networks (SAN) can facilitate the construction of dependability models through the flexibility in their definition and their approach to hierarchical design. A basic model of the RAID level 5 dependability is initially designed. Then, this basic model is extended to include a pool of on-line spare disks. The influence of the number of spares and the replacement policy is then analyzed. However RAIDs consist of disk drives and other components (controllers, cabling, power supplies, etc.) that can also fail. In our models, failures of these support hardware components are considered and benefits for reliability obtained by making these components redundant are studied. Two organizations with duplicated controllers are also compared",
    	booktitle = "Dependable Computing for Critical Applications 6",
    	isbn = 9780818680090,
    	journal = "Dependable Computing and Fault-Tolerant Systems. Vol.11. Dependable Computing for Critical Applications 6",
    	keywords = "fault tolerant computing;RAID;redundancy;",
    	note = "on-line spare disks;duplicated controllers;RAID dependability;stochastic activity networks;replacement policy;reliability;",
    	pages = "249 - 70",
    	publisher = "IEEE Computer Society",
    	title = "{I}nfluence of on-line spare disks and duplicated controllers on {RAID} dependability",
    	year = 1998
    }
    
  86. J M Orduna and Jose Duato. On the design of network routers for multimedia applications. 1998, 13 - 20. URL BibTeX

    @conference{ 6076091,
    	author = "J.M. Orduna and Duato, Jose",
    	abstract = "Parallel computing systems based on high performance interconnection networks are being used nowadays for online multimedia applications. The potential market of such applications seems to be large enough to justify a specific architecture oriented to support them. Wave switching is a hybrid switching technique for high performance routers which combines wormhole switching and circuit switching in the same router architecture. We propose the use of wave switching in parallel computing systems for applications like distributed multimedia systems or multicomputer based databases. These applications generate an intensive, bursty traffic together with a small percentage of control message traffic. For this kind of traffic, wave switching can considerably improve the throughput of a parallel computing system. Performance evaluation results show a drastic reduction in latency and an improvement in throughput with regard to networks with the same channel width using wormhole switching",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings of the 1998 ICPP Workshop on Architectural and OS Support for Multimedia Applications Flexible Communication Systems. Wireless Networks and Mobile Computing (Cat. No.98EX206)",
    	keywords = "circuit switching;multimedia systems;parallel processing;telecommunication congestion control;",
    	note = "network router design;multimedia applications;parallel computing systems;high performance interconnection networks;online multimedia applications;wave switching;hybrid switching technique;high performance routers;wormhole switching;circuit switching;router architecture;distributed multimedia systems;multicomputer based databases;bursty traffic;control message traffic;parallel computing system;channel width;",
    	pages = "13 - 20",
    	title = "{O}n the design of network routers for multimedia applications",
    	url = "http://dx.doi.org/10.1109/ICPPW.1998.721869",
    	year = 1998
    }
    
  87. Pedro Lopez, J M Martinez, Jose Duato and F Petrini. On the reduction of deadlock frequency by limiting message injection in wormhole networks. 1998, 295 - 307. BibTeX

    @conference{ 5992388,
    	author = "Lopez, Pedro and J.M. Martinez and Duato, Jose and F. Petrini",
    	abstract = "Recently, deadlock recovery strategies have begun to gain acceptance in networks using wormhole switching. In particular, progressive deadlock recovery techniques are very attractive because they allocate a few dedicated resources to quickly deliver deadlocked packets, instead of killing them. Deadlock recovery is based on the assumption that deadlocks are really rare. Otherwise, recovery techniques are not efficient. We propose the use of a message injection limitation mechanism that reduces the probability of deadlock to negligible values, even when fully adaptive routing is used. The main new feature is that it can be used with different message destination distributions. The proposed mechanism can be combined with any deadlock detection mechanism. In particular, we use the deadlock detection mechanism proposed in Martinez (1997). In addition, the proposed injection limitation mechanism considerably reduces performance degradation when the network reaches the saturation point",
    	address = "Berlin, Germany",
    	journal = "Parallel Computer Routing and Communication. Second International Workshop, PCRCW'97. Proceedings",
    	keywords = "multiprocessor interconnection networks;network routing;packet switching;performance evaluation;probability;resource allocation;system recovery;",
    	note = "deadlock frequency;wormhole switching;progressive deadlock recovery;resource allocation;deadlocked packets;message injection limitation;probability;fully adaptive routing;message destination distributions;network performance;",
    	pages = "295 - 307",
    	title = "{O}n the reduction of deadlock frequency by limiting message injection in wormhole networks",
    	year = 1998
    }
    
  88. Federico Silla and Jose Duato. On the use of virtual channels in networks of workstations with irregular topology. 1998, 203 - 16. BibTeX

    @conference{ 5992382,
    	author = "Silla, Federico and Duato, Jose",
    	abstract = "Networks of workstations are becoming increasingly popular as a cost-effective alternative to parallel computers. Typically, these networks connect processors using irregular topologies, providing the wiring flexibility, scalability and incremental expansion capability required in this environment. Recently, we proposed a design methodology as well as fully adaptive routing algorithms for irregular topologies. These algorithms increase throughput considerably with respect to previously existing ones but require the use of virtual channels. In this paper we propose a very efficient flow control mechanism to support virtual channels when link wires are very long, and/or have different lengths. This flow control mechanism relies on the use of channel pipelining and control flits. Control traffic is minimized by assigning physical bandwidth to virtual channels until the corresponding message blocks or it is completely transmitted. Simulations show that the resulting flow control protocol performs almost as efficiently as an ideal network with short wires and flit-by-flit multiplexing",
    	address = "Berlin, Germany",
    	journal = "Parallel Computer Routing and Communication. Second International Workshop, PCRCW'97. Proceedings",
    	keywords = "message passing;multiprocessor interconnection networks;network topology;pipeline processing;resource allocation;shared memory systems;",
    	note = "virtual channels;networks of workstations;irregular topology;processor interconnection;flow control mechanism;link wires;channel pipelining;control flits;traffic minimization;physical bandwidth assignment;message transmission;simulations;adaptive routing;",
    	pages = "203 - 16",
    	title = "{O}n the use of virtual channels in networks of workstations with irregular topology",
    	year = 1998
    }
    
  89. . Parallel Computer Routing and Communication. Second International Workshop, PCRCW'97. Proceedings. 1998, xii+307 -. BibTeX

    @conference{ 5966521,
    	author = "",
    	abstract = "The following topics were dealt with: routing; router and network architecture; messaging layer support; and deadlock issues",
    	address = "Berlin, Germany",
    	editor = "Yalamanchili, S.;Duato, J.;",
    	keywords = "computer networks;multiprocessor interconnection networks;parallel architectures;",
    	note = "routing;router;network architecture;messaging layer support;deadlock;parallel computer routing;parallel computer communication;",
    	pages = "xii+307 -",
    	title = "{P}arallel {C}omputer {R}outing and {C}ommunication. {S}econd {I}nternational {W}orkshop, {PCRCW}'97. {P}roceedings",
    	year = 1998
    }
    
  90. R Garcia and Jose Duato. Suboptimal-optimal routing for LAN internetworking using transparent bridges. International Journal of Foundations of Computer Science 9(2):139 - 56, 1998. URL BibTeX

    @article{ 5996619,
    	author = "R. Garcia and Duato, Jose",
    	abstract = "The current standard transparent bridge protocol IEEE-802.1D is based on the Spanning Tree (ST) algorithm. It has a very important restriction: it cannot work when the topology has active loops. Therefore, a tree is the only possible interconnection topology that can be used. The ST algorithm guarantees that the active topology is a tree discarding lines that form loops. However, because of this, network bandwidth cannot be fully utilized. Moreover, trees have a very serious bottleneck near the root. This paper proposes a new transparent bridge protocol for LAN interconnection that allows active loops. Therefore, strongly connected regular topologies like tori, hypercubes, meshes, etc., as well as irregular topologies can be used without wasting bandwidth. As loops provide alternative paths, the new protocol (named OSR for Optimal-Suboptimal Routing) uses optimal routing or, in the worst case, suboptimal routing",
    	address = "Singapore",
    	issn = "0129-0541",
    	journal = "International Journal of Foundations of Computer Science",
    	keywords = "hypercube networks;LAN interconnection;telecommunication network routing;transport protocols;",
    	note = "suboptimal-optimal routing;LAN internetworking;transparent bridges;transparent bridge protocol IEEE-802.1D;spanning tree algorithm;interconnection topology;strongly connected regular topologies;tori;hypercubes;meshes;suboptimal routing;",
    	number = 2,
    	pages = "139 - 56",
    	title = "{S}uboptimal-optimal routing for {LAN} internetworking using transparent bridges",
    	url = "http://dx.doi.org/10.1142/S0129054198000118",
    	volume = 9,
    	year = 1998
    }
    
  91. J L Sanchez, Jose Duato and J M Garcia. Using channel pipelining in reconfigurable interconnection networks. 1998, 120 - 6. URL BibTeX

    @conference{ 5836358,
    	author = "J.L. Sanchez and Duato, Jose and J.M. Garcia",
    	abstract = "The major problem in wormhole routing networks is related with the contention due to message blocking. Reconfigurable networks are an alternative to reduce the negative effect that congestion produces on the performance of the network. Our work is focused on dynamic reconfiguration. This technique consists basically of placing the different processors in the network in those positions which, at each computational moment and according to the existing communication pattern among them, are more adequate for the development of such computation. In a reconfigurable architecture, the clock period is determined by the transmission time across the switch. To increase this frequency the channel pipelined technique is used. In this paper we present the foundations of reconfigurable network architecture. We show the general structure of the reconfigurable systems and we indicate the characteristics of the channel pipelining technique. Finally, we evaluate the performance of a reconfigurable system",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings of the Sixth Euromicro Workshop on Parallel and Distributed Processing - PDP'98 - (Cat. No.98EX134)",
    	keywords = "multiprocessor interconnection networks;performance evaluation;reconfigurable architectures;telecommunication network routing;",
    	note = "channel pipelining;reconfigurable interconnection networks;wormhole routing networks;message blocking;performance;dynamic reconfiguration;communication pattern;reconfigurable architecture;transmission time;",
    	pages = "120 - 6",
    	title = "{U}sing channel pipelining in reconfigurable interconnection networks",
    	url = "http://dx.doi.org/10.1109/EMPDP.1998.647188",
    	year = 1998
    }
    
  92. Pedro Lopez, Juan Miguel Martínez and Jose Duato. Very efficient distributed deadlock detection mechanism for wormhole networks. 1998, 57 - 66. BibTeX

    @conference{ 1998534159795,
    	author = "Lopez, Pedro and Mart{\'i}nez, Juan Miguel and Duato, Jose",
    	abstract = "Networks using wormhole switching have traditionally relied upon deadlock avoidance strategies for the design of routing algorithms. More recently, deadlock recovery strategies have begun to gain acceptance. Progressive deadlock recovery techniques are very attractive because they allocate a few dedicated resources to quickly deliver deadlocked messages, instead of killing them. However, the distributed deadlock detection techniques proposed up to now detect many false deadlocks, especially when the network is heavily loaded and messages have different lengths. As a consequence, messages detected as deadlocked may saturate the bandwidth offered by recovery resources, thus degrading performance considerably. In this paper we propose an improved distributed deadlock detection mechanism that uses only local information, detects all the deadlocks, considerably reduces the probability of false deadlock detection and is not strongly affected by variations in message length and message destination distribution.",
    	address = "Las Vegas, NV, USA",
    	journal = "IEEE High-Performance Computer Architecture Symposium Proceedings",
    	key = "Computer system recovery",
    	keywords = "Algorithms;Bandwidth;Computer networks;Distributed computer systems;Error detection;",
    	note = "Distributed deadlock detection mechanisms;Wormhole networks;",
    	pages = "57 - 66",
    	title = "{V}ery efficient distributed deadlock detection mechanism for wormhole networks",
    	year = 1998
    }
    
  93. Federico Silla, Jose Duato, A Sivasubramaniam and C R Das. Virtual channel multiplexing in networks of workstations with irregular topology. 1998, 147 - 54. URL BibTeX

    @conference{ 6129280,
    	author = "Silla, Federico and Duato, Jose and A. Sivasubramaniam and C.R. Das",
    	abstract = "Networks of workstations are becoming a cost-effective alternative for small-scale parallel computing. Although they may not provide the closely coupled environment of multicomputers and multiprocessors, they meet the needs of a great variety of parallel computing problems at a lower cost. However in order to achieve a high efficiency, the interconnects used to build the network of workstations must provide a very high bandwidth and low latencies, making their design a critical issue. Recently, a very efficient flow control protocol for networks of workstations has been proposed by the authors. This protocol multiplexes physical channels between several virtual channels and minimizes the use of control flits by transmitting several data flits each time a virtual channel gets the link. In this protocol, a virtual channel sends data flits until the message blocks or is completely transmitted. However it can reduce network throughput, by increasing short message latency, due to long messages monopolizing channels and hindering the progress of short messages. In this paper, we analyze the impact of limiting the number of flits (block size) that a virtual channel can send once it gets the link. We propose a new version of the previous flow control protocol that is easily, implementable on hardware. Simulation results show that limiting the maximum block size is not a good design decision, because the overall network performance decreases. Only when short message latency is crucial is it is acceptable to limit the block size",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings. Fifth International Conference on High Performance Computing (Cat. No. 98EX238)",
    	keywords = "multiplexing;parallel processing;performance evaluation;protocols;workstation clusters;",
    	note = "virtual channel multiplexing;workstation networks;irregular topology;small-scale parallel computing;high efficiency;interconnects;high bandwidth;low latency;flow control protocol;physical channels;minimized control flit use;data flit transmission;network throughput;simulation;network performance;short message latency;",
    	pages = "147 - 54",
    	title = "{V}irtual channel multiplexing in networks of workstations with irregular topology",
    	url = "http://dx.doi.org/10.1109/HIPC.1998.737983",
    	year = 1998
    }
    
  94. Rosa Alcover, Pedro Lopez, Jose Duato and L Zunica. A methodology for optimal interconnection network design. 1997, 81 - 4. BibTeX

    @conference{ 5863027,
    	author = "Alcover, Rosa and Lopez, Pedro and Duato, Jose and L. Zunica",
    	abstract = "Interconnection network performance depends on several parameters. Some of them can be chosen by the designer: routing algorithm, switching technique, topology and node design parameters. However, there are other parameters that cannot be selected by the designer. Among these, there are parameters that depend on the application, such as message size, message destination distribution and message traffic, as well as parameters defined by the customer, such as network size. The optimization criteria that the network designer should follow is not only maximizing performance, but also selecting the design parameters that achieve a good performance under all the feasible combinations of the parameters that cannot be selected by the designer. We propose a methodology for optimal network design based on robust experimental design techniques used in statistics. As an application, we choose the most important design parameters of a k-ary n-cube network based on that methodology",
    	address = "Raleigh, NC, USA",
    	journal = "Proceedings of the ISCA 10th International Conference on Parallel and Distributed Computing Systems",
    	keywords = "design of experiments;message passing;multiprocessor interconnection networks;optimisation;parallel architectures;performance evaluation;",
    	note = "optimal interconnection network design;interconnection network performance;routing algorithm;switching technique;topology;node design parameters;message size;message destination distribution;message traffic;network size;optimization criteria;experimental design techniques;statistics;k-ary n-cube network;",
    	pages = "81 - 4",
    	title = "{A} methodology for optimal interconnection network design",
    	year = 1997
    }
    
  95. Vien B Dao, S Yalamanchili and Jose Duato. Architectural support for reducing communication overhead in multiprocessor interconnection networks. 1997, 343 - 52. URL BibTeX

    @conference{ 5514285,
    	author = "B. Vien Dao and S. Yalamanchili and Duato, Jose",
    	abstract = "Modern multicomputer interconnection networks offer the delivery of messages with very low latency. However the message in-flight time is only a small portion of the total time that is required to send a message from source to destination. For fine to medium grained message sizes, the majority of time is spent in overheads for setting up and managing message transmission. It is often possible for compilers/programmers to separate inter-processor communication traffic into messages that exhibit communication locality and messages that do not. This paper proposes architectural modifications to network interfaces and routers to enable compilers/programmers to exploit known locality properties of programs in reducing the fixed overhead of transmission. These techniques work well on traffic exhibiting communication locality without unduly penalizing {{\&}}ldquo;ordinary{{\&}}rdquo; message traffic. The proposed techniques are evaluated using communication traces from 5 application program kernels. Significant reductions in average message latency are possible, and we argue that the approach can be used in the next generation of cluster interconnects",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings. Third International Symposium on High-Performance Computer Architecture (Cat. No.97TB100094)",
    	keywords = "message passing;multiprocessor interconnection networks;network interfaces;",
    	note = "architectural support;communication overhead reduction;multiprocessor interconnection networks;message in-flight time;inter-processor communication traffic;communication locality;network interfaces;routers;communication traces;application program kernels;cluster interconnects;",
    	pages = "343 - 52",
    	title = "{A}rchitectural support for reducing communication overhead in multiprocessor interconnection networks",
    	url = "http://dx.doi.org/10.1109/HPCA.1997.569699",
    	year = 1997
    }
    
  96. Binh Vien Dao, Sudhakar Yalamanchili and Jose Duato. Architectural support for reducing communication overhead in multiprocessor interconnection networks. 1997, 343 - 352. BibTeX

    @conference{ 1997173562519,
    	author = "Binh Vien Dao and Sudhakar Yalamanchili and Duato, Jose",
    	abstract = "Modern multicomputer interconnection networks offer the delivery of messages with very low latency. However, the message in-flight time is only a small portion of the total time that is required to send a message from source to destination. For fine to medium grained message sizes, the majority of time is spent in overheads for setting up and managing message transmission. It is often possible for compilers/programmers to separate inter-processor communication traffic into messages that exhibit communication locality, and messages that do not. This paper proposes architectural modifications to network interfaces and routers to enable compilers/programmers to exploit known locality properties of programs in reducing the fixed overhead of transmission. These techniques work well on traffic exhibiting communication locality without unduly penalizing `ordinary' message traffic. The proposed techniques are evaluated using communication traces from 5 application program kernels. Significant reductions in average message latency are possible, and we argue that the approach can be used tn the next generation of cluster interconnects.",
    	address = "San Antonio, TX, USA",
    	journal = "IEEE High-Performance Computer Architecture Symposium Proceedings",
    	key = "Interconnection networks",
    	keywords = "Buffer storage;Computer architecture;Data communication systems;Interfaces;Pipeline processing systems;Program compilers;Telecommunication traffic;",
    	note = "Communication locality;",
    	pages = "343 - 352",
    	title = "{A}rchitectural support for reducing communication overhead in multiprocessor interconnection networks",
    	year = 1997
    }
    
  97. Jose Duato, Pedro Lopez and S Yalamanchili. Deadlock- and livelock-free routing protocols for wave switching. In Parallel Processing Symposium, 1997. Proceedings., 11th International. April 1997, 570 -577. URL, DOI BibTeX

    @conference{ 580958,
    	author = "Duato, Jose and Lopez, Pedro and S. Yalamanchili",
    	abstract = "Wave switching is a hybrid switching technique for high performance routers. It combines wormhole switching and circuit switching in the same router architecture. Wave switching achieves very high performance by exploiting communication locality. When two nodes are going to communicate frequently, a physical circuit is established between them. By combining circuit switching, pre-established physical circuits and wave pipelining across channels and switches, it is possible to increase network bandwidth considerably, also reducing latency for communications that use pre-established physical circuits. In this paper we propose two protocols for routers implementing wave switching. The first protocol handles the network as a cache of circuits, automatically establishing a circuit when two nodes are going to communicate. Subsequent communications use the previously established circuit. When a new circuit requests channels belonging to another circuit, a replacement algorithm selects the circuit to be torn down. The second protocol relies on the programmer and/or the compiler to decide when a circuit should be established or torn down for a set of messages. Also, we show that the proposed protocols are always able to deliver messages, and are deadlock- and livelock-free",
    	booktitle = "Parallel Processing Symposium, 1997. Proceedings., 11th International",
    	doi = "10.1109/IPPS.1997.580958",
    	keywords = "circuit switching;deadlock-free;high performance routers;livelock-free;protocol;routing protocols;wave switching;wormhole switching;circuit switching;concurrency control;multiprocessor interconnection networks;network routing;protocols;",
    	month = "apr",
    	pages = "570 -577",
    	title = "{D}eadlock- and livelock-free routing protocols for wave switching",
    	url = "http://dx.doi.org/10.1109/IPPS.1997.580958",
    	year = 1997
    }
    
  98. Jose Duato, Pedro Lopez and S Yalamanchili. Deadlock- and livelock-free routing protocols for wave switching. 1997, 570 - 7. URL BibTeX

    @conference{ 5559828,
    	author = "Duato, Jose and Lopez, Pedro and S. Yalamanchili",
    	abstract = "Wave switching is a hybrid switching technique for high performance routers. It combines wormhole switching and circuit switching in the same router architecture. Wave switching achieves very high performance by exploiting communication locality. When two nodes are going to communicate frequently, a physical circuit is established between them. By combining circuit switching, pre-established physical circuits and wave pipelining across channels and switches, it is possible to increase network bandwidth considerably, also reducing latency for communications that use pre-established physical circuits. In this paper we propose two protocols for routers implementing wave switching. The first protocol handles the network as a cache of circuits, automatically establishing a circuit when two nodes are going to communicate. Subsequent communications use the previously established circuit. When a new circuit requests channels belonging to another circuit, a replacement algorithm selects the circuit to be torn down. The second protocol relies on the programmer and/or the compiler to decide when a circuit should be established or torn down for a set of messages. Also, we show that the proposed protocols are always able to deliver messages, and are deadlock- and livelock-free",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings. 11th International Parallel Processing Symposium (Cat. No.97TB100107)",
    	keywords = "circuit switching;concurrency control;multiprocessor interconnection networks;network routing;protocols;",
    	note = "wave switching;routing protocols;high performance routers;wormhole switching;circuit switching;protocol;livelock-free;deadlock-free;",
    	pages = "570 - 7",
    	title = "{D}eadlock- and livelock-free routing protocols for wave switching",
    	url = "http://dx.doi.org/10.1109/IPPS.1997.580958",
    	year = 1997
    }
    
  99. Federico Silla and Jose Duato. Improving the efficiency of adaptive routing in networks with irregular topology. 1997, 330 - 5. URL BibTeX

    @conference{ 5767661,
    	author = "Silla, Federico and Duato, Jose",
    	abstract = "Networks of workstations are emerging as a cost-effective alternative to parallel computers. The interconnection between workstations usually relies on switch-based networks with irregular topologies. This irregularity makes routing and deadlock avoidance quite complicated. Current proposals avoid deadlock by removing cyclic dependencies between channels and therefore, many messages are routed along non-minimal paths, increasing latency and wasting resources. We propose a general methodology for the design of adaptive routing algorithms for networks with irregular topology that improves a previously proposed one by reducing the probability of routing over non-minimal paths. The resulting routing algorithms allow messages to follow minimal paths in most cases, reducing message latency and increasing network throughput. As an example of application, we propose an improved adaptive routing algorithm for Autonet",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings. Fourth International Conference on High-Performance Computing (Cat. No.97TB100185)",
    	keywords = "concurrency control;graph theory;local area networks;message switching;parallel processing;performance evaluation;telecommunication network routing;",
    	note = "adaptive routing;irregular topology networks;workstation networks;cost-effective;parallel computers;switch-based networks;deadlock avoidance;cyclic dependencies;message routing;latency;probability;minimal paths;message latency;network throughput;Autonet;local area networks;",
    	pages = "330 - 5",
    	title = "{I}mproving the efficiency of adaptive routing in networks with irregular topology",
    	url = "http://dx.doi.org/10.1109/HIPC.1997.634511",
    	year = 1997
    }
    
  100. Federico Silla and Jose Duato. Improving the efficiency of adaptive routing in networks with irregular topology. 1997, 330 - 335. BibTeX

    @conference{ 1998104020145,
    	author = "Silla, Federico and Duato, Jose",
    	abstract = "Networks of workstations are emerging as a cost-effective alternative to parallel computers. The interconnection between workstations usually relies on switch-based networks with irregular topologies. This irregularity makes routing and deadlock avoidance quite complicated. Current proposals avoid deadlock by removing cyclic dependencies between channels and therefore, many messages are routed along non-minimal paths, increasing latency and wasting resources. In this paper, we propose a general methodology for the design of adaptive routing algorithms for networks with irregular topology that improves over a previously proposed one by reducing the probability of routing over non-minimal paths. The resulting routing algorithms allow messages to follow minimal paths in most cases, reducing message latency and increasing network throughput. As an example of application, we propose an improved adaptive routing algorithm for Autonet.",
    	address = "Bangalore, India",
    	journal = "Proceedings of the International Conference on High Performance Computing, HiPC",
    	key = "Computer networks",
    	keywords = "Adaptive algorithms;Communication channels (information theory);Computer system recovery;Computer workstations;Congestion control (communication);Electric network topology;Probability;Response time (computer systems);Switching circuits;",
    	note = "Adaptive routing algorithms;",
    	pages = "330 - 335",
    	title = "{I}mproving the efficiency of adaptive routing in networks with irregular topology",
    	year = 1997
    }
    
  101. T Olivares, P Cuenca, F J Quiles, A Garrido, J L Sanchez and Jose Duato. Interconnection network behavior on a multicomputer in the parallelization of the MPEG coding algorithm. Worm-hole vs Packet-Switching Routing. 1997, 48 - 53. BibTeX

    @conference{ 1998104020104,
    	author = "T. Olivares and P. Cuenca and F.J. Quiles and A. Garrido and J.L. Sanchez and Duato, Jose",
    	abstract = "In this work we propose the implementation of a MPEG encoder developed by the University of California at Berkeley on a multicomputer system. Since this application is in real time, we present a mapping of the video sequence between the EPs of the architecture, where the communication between EPs is minimized. We also propose the necessary load/store process with a simple mechanism input/output, where the global distribution process latency is compensated. Idoneity of the topology of the system is analyzed together with the most adequate commutation technique for the interconnection network. Finally the incidence of the frame format on the system communication performance will be analyzed.",
    	address = "Bangalore, India",
    	journal = "Proceedings of the International Conference on High Performance Computing, HiPC",
    	key = "Image coding",
    	keywords = "Algorithms;Computer architecture;Data communication systems;Image compression;Interconnection networks;Packet switching;Parallel processing systems;Real time systems;Standards;",
    	note = "Motion Picture Experts Group (MPEG) standards;Worm hole routing;",
    	pages = "48 - 53",
    	title = "{I}nterconnection network behavior on a multicomputer in the parallelization of the {MPEG} coding algorithm. {W}orm-hole vs {P}acket-{S}witching {R}outing",
    	year = 1997
    }
    
  102. T Olivares, P Cuenca, F J Quiles, A Garrido, J L Sanchez and Jose Duato. Interconnection network behavior on a multicomputer in the parallelization of the MPEG coding algorithm. Worm-hole vs. packet-switching routing. 1997, 48 - 53. URL BibTeX

    @conference{ 5767620,
    	author = "T. Olivares and P. Cuenca and F.J. Quiles and A. Garrido and J.L. Sanchez and Duato, Jose",
    	abstract = "We propose the implementation of a MPEG encoder developed by the University of California at Berkeley on a multicomputer system. Since this application is in real time, we present a mapping of the video sequence between the EPs of the architecture, where the communication between EPs is minimized. We also propose the necessary load/store process with a simple mechanism input/output, where the global distribution process latency is compensated. Idonety of the topology of the system is analyzed, together with the most adequate commutation technique for the interconnection network. Finally the incidence of the frame format on the system communication performance is analyzed",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings. Fourth International Conference on High-Performance Computing (Cat. No.97TB100185)",
    	keywords = "data compression;multiprocessing systems;multiprocessor interconnection networks;packet switching;parallel algorithms;real-time systems;video coding;",
    	note = "interconnection network behavior;parallelization;MPEG coding algorithm;packet switching routing;MPEG encoder;multicomputer system;video sequence;load/store process;simple mechanism input/output;global distribution process latency;system topology;commutation technique;frame format;system communication performance;",
    	pages = "48 - 53",
    	title = "{I}nterconnection network behavior on a multicomputer in the parallelization of the {MPEG} coding algorithm. {W}orm-hole vs. packet-switching routing",
    	url = "http://dx.doi.org/10.1109/HIPC.1997.634469",
    	year = 1997
    }
    
  103. Fabrizio Petrini, Jose Duato, Pedro Lopez and Juan Miguel Martínez. LIFE: A Limited Injection, Fully adaptivE, recovery-based routing algorithm. 1997, 316 - 321. BibTeX

    @conference{ 1998104020143,
    	author = "Fabrizio Petrini and Duato, Jose and Lopez, Pedro and Mart{\'i}nez, Juan Miguel",
    	abstract = "Networks using wormhole switching have traditionally relied upon deadlock avoidance strategies for the design of deadlock-free algorithms. The past few years have seen a rise in popularity of deadlock recovery strategies, that are based on the property that deadlocks are quite rare in practice and happen only at or beyond the network saturation point. In fact, recovery-based routing algorithms have a higher potential performance over the deadlock avoidance-based ones which allow less routing freedom. In this paper we present a recovery-based fully adaptive routing algorithm, LIFE, which is based on an innovative injection policy that reduces the probability of deadlocks to negligible values, both with uniform and non-uniform traffic patterns. The experimental results, conducted on a 8-ary 3-cube with 512 nodes, show that it is possible to implement true fully adaptive routing using only two virtual channels. Also, LIFE outperforms state-of-the-art avoidance- and recovery-based algorithms of the same cost, both in terms of throughput and message latency under uniform traffic and provides stable throughput under non-uniform traffic patterns.",
    	address = "Bangalore, India",
    	journal = "Proceedings of the International Conference on High Performance Computing, HiPC",
    	key = "Computer system recovery",
    	keywords = "Adaptive algorithms;Communication channels;Computer networks;Congestion control;Switching circuits;Telecommunication traffic;",
    	note = "Deadlock free algorithms;Non uniform traffic patterns;",
    	pages = "316 - 321",
    	title = "{LIFE}: {A} {L}imited {I}njection, {F}ully adaptiv{E}, recovery-based routing algorithm",
    	year = 1997
    }
    
  104. Juan Miguel Martínez, Pedro Lopez, Jose Duato and T M Pinkston. Software-based deadlock recovery technique for true fully adaptive routing in wormhole networks. In Parallel Processing, 1997., Proceedings of the 1997 International Conference on. August 1997, 182 -189. URL, DOI BibTeX

    @conference{ 622586,
    	author = "Mart{\'i}nez, Juan Miguel and Lopez, Pedro and Duato, Jose and T.M. Pinkston",
    	abstract = "In this paper, we take a different approach to handle deadlocks and performance degradation. We propose the use of an injection limitation mechanism that prevents performance degradation near the saturation point and reduces the probability of deadlock to negligible values even when fully adaptive routing is used. We also propose an improved deadlock detection mechanism that only uses local information, detects all the deadlocks, and considerably reduces the probability of false deadlock detection over previous proposals. In the rare case when impending deadlock is detected, our proposed recovery technique absorbs the deadlocked message at the current node and later re-injects it for continued routing towards its destination. Performance evaluation results show that our new approach to deadlock handling is more efficient than previously proposed techniques",
    	booktitle = "Parallel Processing, 1997., Proceedings of the 1997 International Conference on",
    	doi = "10.1109/ICPP.1997.622586",
    	keywords = "deadlock detection mechanism;deadlocked message;fully adaptive routing;injection limitation mechanism;performance degradation;performance evaluation;software-based deadlock recovery technique;true fully adaptive routing;wormhole networks;concurrency contr",
    	month = "aug",
    	pages = "182 -189",
    	title = "{S}oftware-based deadlock recovery technique for true fully adaptive routing in wormhole networks",
    	url = "http://dx.doi.org/10.1109/ICPP.1997.622586",
    	year = 1997
    }
    
  105. J M Martinez, Pedro Lopez, Jose Duato and T M Pinkston. Software-based deadlock recovery technique for true fully adaptive routing in wormhole networks. 1997, 182 - 9. URL BibTeX

    @conference{ 5698560,
    	author = "J.M. Martinez and Lopez, Pedro and Duato, Jose and T.M. Pinkston",
    	abstract = "In this paper, we take a different approach to handle deadlocks and performance degradation. We propose the use of an injection limitation mechanism that prevents performance degradation near the saturation point and reduces the probability of deadlock to negligible values even when fully adaptive routing is used. We also propose an improved deadlock detection mechanism that only uses local information, detects all the deadlocks, and considerably reduces the probability of false deadlock detection over previous proposals. In the rare case when impending deadlock is detected, our proposed recovery technique absorbs the deadlocked message at the current node and later re-injects it for continued routing towards its destination. Performance evaluation results show that our new approach to deadlock handling is more efficient than previously proposed techniques",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings of the 1997 International Conference on Parallel Processing (Cat. No.97TB100162)",
    	keywords = "concurrency control;hypercube networks;network routing;software performance evaluation;system recovery;",
    	note = "software-based deadlock recovery technique;true fully adaptive routing;wormhole networks;performance degradation;injection limitation mechanism;fully adaptive routing;deadlock detection mechanism;deadlocked message;performance evaluation;",
    	pages = "182 - 9",
    	title = "{S}oftware-based deadlock recovery technique for true fully adaptive routing in wormhole networks",
    	url = "http://dx.doi.org/10.1109/ICPP.1997.622586",
    	year = 1997
    }
    
  106. Jose Duato. Switching techniques, adaptive routing and deadlock handling in interconnection networks. 1997, 88 -. BibTeX

    @conference{ 1997403781776,
    	author = "Duato, Jose",
    	abstract = "Three key issues in the design of interconnection networks: switching techniques, mechanisms for deadlock handling, and routing algorithms is discussed. These three issues are closely related to each other. Several switching techniques are described, including hybrid techniques, and highlighting the relationship between switching technique and network technology. Mechanisms in deadlock handling in interconnection networks and their application to the design of adaptive routing algorithms is presented. Techniques for deadlock avoidance and recovery, focusing mainly on proposals that allow cyclic dependencies between network resources are also described.",
    	address = "Montreal, Can",
    	journal = "International Conference on Massively Parallel Processing Using Optical Interconnections (MPPOI), Proceedings",
    	key = "Interconnection networks",
    	keywords = "Adaptive algorithms;Computer networks;Computer system recovery;",
    	note = "Adaptive routing algorithms;Deadlock handling;",
    	pages = "88 -",
    	title = "{S}witching techniques, adaptive routing and deadlock handling in interconnection networks",
    	year = 1997
    }
    
  107. Jose Duato. Theory of fault-tolerant routing in wormhole networks. IEEE Transactions on Parallel and Distributed Systems 8(8):790 - 802, 1997. URL BibTeX

    @article{ 1997463838107,
    	author = "Duato, Jose",
    	abstract = "Fault-tolerant systems aim at providing continuous operation in the presence of faults. Multicomputers rely on an interconnection network between processors to support the message-passing mechanism. Therefore, the reliability of the interconnection network is very important for the reliability of the whole system. This paper analyzes the effective redundancy available in a wormhole network by combining connectivity and deadlock freedom. Redundancy is defined at the channel level. We propose a sufficient condition for channel redundancy, also computing the set of redundant channels. The redundancy level of the network is also defined, proposing a theorem that supplies its value. This theory is developed on top of our necessary and sufficient condition for deadlock-free adaptive routing. The new theory also considers the failure of physical channels when virtual channels are used. Finally, we propose a methodology for the design of fault-tolerant routing algorithms, showing its application to n-dimensional meshes.",
    	address = "Los Alamitos, CA, United States",
    	issn = 10459219,
    	journal = "IEEE Transactions on Parallel and Distributed Systems",
    	key = "Fault tolerant computer systems",
    	keywords = "Algorithms;Communication channels (information theory);Computational complexity;Computer system recovery;Interconnection networks;Redundancy;Reliability;Virtual reality;",
    	note = "Adaptive routing;Channel redundancy;Fault tolerant routing;Wormhole networks;",
    	number = 8,
    	pages = "790 - 802",
    	title = "{T}heory of fault-tolerant routing in wormhole networks",
    	url = "http://dx.doi.org/10.1109/71.605766",
    	volume = 8,
    	year = 1997
    }
    
  108. Federico Silla and Jose Duato. Tuning the number of virtual channels in networks of workstations. 1997, 72 - 5. BibTeX

    @conference{ 5870025,
    	author = "Silla, Federico and Duato, Jose",
    	abstract = "Networks of workstations (NOWs) are becoming increasingly popular as a cost-effective alternative to parallel computers. Typically, these networks connect processors using switch-based interconnects with irregular topology. We proposed a design methodology as well as fully adaptive routing algorithms for irregular topologies. These algorithms require the use of, at least, two virtual channels. We have also proposed a very efficient flow control mechanism to support virtual channels in the environment of irregular networks with varying wire lengths. We study the effect that additional virtual channels have on the performance of irregular networks built using the routing algorithms and the flow control mechanism. Results reveal that the optimal number of virtual channels per physical channel varies with network size",
    	address = "Raleigh, NC, USA",
    	journal = "Proceedings of the ISCA 10th International Conference on Parallel and Distributed Computing Systems",
    	keywords = "local area networks;multiprocessor interconnection networks;performance evaluation;telecommunication channels;telecommunication network routing;",
    	note = "virtual channel tuning;workstation networks;cost effective;parallel computers;processor interconnection networks;switch based interconnects;irregular topology;design methodology;adaptive routing algorithms;flow control;varying wire length;network performance;routing algorithms;network size;wormhole switching;",
    	pages = "72 - 5",
    	title = "{T}uning the number of virtual channels in networks of workstations",
    	year = 1997
    }
    
  109. Jose Duato, Pedro Lopez, Federico Silla and S Yalamanchili. A high performance router architecture for interconnection networks. 1996, 61 - 8. URL BibTeX

    @conference{ 5376067,
    	author = "Duato, Jose and Lopez, Pedro and Silla, Federico and S. Yalamanchili",
    	abstract = "We propose a new router architecture that supports wormhole switching and circuit switching concurrently. This architecture has been designed to take advantage of temporal communication locality. This can be done by establishing a circuit between nodes that are going to communicate frequently. Messages using those circuits face no contention. By combining circuit switching, pre-established physical circuits and wave pipelining across channels and switches, it is possible to increase network bandwidth considerably, also reducing latency for communications that use pre-established physical circuits. This router architecture also allows to reduce the overhead of the software messaging layer in multicomputers by offering a better hardware support. Preliminary performance evaluation results show a drastic reduction in latency and increment in throughput when messages are long enough, even if circuits are established for a single transmission and locality is not exploited",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings of the 1996 International Conference on Parallel Processing. Vol.1 Architecture",
    	keywords = "message passing;multiprocessor interconnection networks;parallel architectures;performance evaluation;",
    	note = "high performance router architecture;interconnection networks;wormhole switching;circuit switching;temporal communication locality;router architecture;software messaging layer;performance evaluation;",
    	pages = "61 - 8",
    	title = "{A} high performance router architecture for interconnection networks",
    	url = "http://dx.doi.org/10.1109/ICPP.1996.537144",
    	volume = "vol.1",
    	year = 1996
    }
    
  110. M P Malumbres, Jose Duato and J Torrellas. An efficient implementation of tree-based multicast routing for distributed shared-memory multiprocessors. 1996, 186 - 9. URL BibTeX

    @conference{ 5465328,
    	author = "M.P. Malumbres and Duato, Jose and J. Torrellas",
    	abstract = "This paper presents an efficient routing and flow control mechanism to implement multidestination message passing in wormhole networks. It is targeted to situations where the size of message data is very small, like in invalidation and update messages in distributed shared-memory multiprocessors (DSMs) with hardware cache coherence. The mechanism is a variation of tree-based multicast with pruning to avoid deadlocks. The new scheme does not require that the destination addresses in a given multicast message be ordered, thereby avoiding any ordering overhead. It allows messages to use any deadlock-free routing function and only requires one startup for each multicast message. The new scheme has been evaluated on several k-ary n-cube networks under synthetic loads. The results show that the proposed scheme is faster than other multicast mechanisms when the multicast traffic is composed of short messages",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings. Eighth IEEE Symposium on Parallel and Distributed Processing (Cat. No.96TB100088)",
    	keywords = "communication complexity;distributed memory systems;message passing;shared memory systems;tree data structures;",
    	note = "tree-based;multicast routing;shared-memory multiprocessors;distributed shared-memory;wormhole networks;multidestination message passing;synthetic loads;multicast mechanisms;",
    	pages = "186 - 9",
    	title = "{A}n efficient implementation of tree-based multicast routing for distributed shared-memory multiprocessors",
    	url = "http://dx.doi.org/10.1109/SPDP.1996.570332",
    	year = 1996
    }
    
  111. M P Malumbres, Jose Duato and Josep Torrellas. Efficient implementation of tree-based multicast routing for distributed shared-memory multiprocessors. 1996, 186 - 189. BibTeX

    @conference{ 1997093486439,
    	author = "M.P. Malumbres and Duato, Jose and Josep Torrellas",
    	abstract = "This paper presents an efficient routing and flow control mechanism to implement multidestination message passing in wormhole networks. It is targeted to situations where the size of message data is very small, like in invalidation and update messages in distributed shared-memory multiprocessors (DSMs) with hardware cache coherence. The mechanism is a variation of tree-based multicast with pruning to avoid deadlocks. The new scheme does not require that the destination addresses in a given multicast message be ordered, thereby avoiding any ordering overhead. It allows messages to use any deadlock-free routing function and only requires one startup for each multicast message. The new scheme has been evaluated on several k-ary n-cube networks under synthetic loads. The results show that the proposed scheme is faster than other multicast mechanisms when the multicast traffic is composed of short messages.",
    	address = "New Orleans, LA, USA",
    	issn = 10636374,
    	journal = "IEEE Symposium on Parallel and Distributed Processing - Proceedings",
    	key = "Distributed computer systems",
    	keywords = "Computer system recovery;Data communication systems;Distributed database systems;Synchronization;Telecommunication traffic;Trees;",
    	note = "Distributed shared memory multiprocessors (DSM);Tree based multicast routing;",
    	pages = "186 - 189",
    	title = "{E}fficient implementation of tree-based multicast routing for distributed shared-memory multiprocessors",
    	year = 1996
    }
    
  112. Anjan K V., Timothy Mark Pinkston and Jose Duato. Generalized theory for deadlock-free adaptive wormhole routing and its application to Disha concurrent. 1996, 815 - 821. BibTeX

    @conference{ 1996363246596,
    	author = "Anjan K. V. and Timothy Mark Pinkston and Duato, Jose",
    	abstract = "This paper generalizes a theory for deadlock-free adaptive wormhole routing by considering a mixed set of resources: edge and central buffers. This generalized theory is then applied to a concurrent version of Disha deadlock-recovery which relaxes the sequential recovery requirement for simultaneous recovery from deadlocks. The proposed extension to Disha does not necessitate any additional resource cost; rather, it serves to eliminate the requirement of mutual exclusive access to the deadlock-free lane implemented by a Token. With this extension, Disha Concurrent remains applicable to any topology with a Hamiltonian path including k-ary n-cube networks and is also applicable to tree-based networks.",
    	address = "Honolulu, HI, USA",
    	issn = 10636374,
    	journal = "IEEE Symposium on Parallel and Distributed Processing - Proceedings",
    	key = "Computation theory",
    	keywords = "Adaptive algorithms;Buffer storage;Communication channels (information theory);Computer system recovery;Electric network topology;Interconnection networks;Packet switching;",
    	note = "Deadlock free adaptive wormhole routing;Deadlocks;Disha concurrent;Generalized theory;",
    	pages = "815 - 821",
    	title = "{G}eneralized theory for deadlock-free adaptive wormhole routing and its application to {D}isha concurrent",
    	year = 1996
    }
    
  113. A K Venkatramani, T M Pinkston and Jose Duato. Generalized theory for deadlock-free adaptive wormhole routing and its application to Disha Concurrent. 1996, 815 - 21. URL BibTeX

    @conference{ 5309807,
    	author = "A.K. Venkatramani and T.M. Pinkston and Duato, Jose",
    	abstract = "This paper generalizes a theory for deadlock-free adaptive wormhole routing by considering a mixed set of resources: edge and central buffers. This generalized theory is then applied to a concurrent version of Disha deadlock-recovery which relaxes the sequential recovery requirement for simultaneous recovery from deadlocks. The proposed extension to Disha does not necessitate any additional resource cost; rather it serves to eliminate the requirement of mutual exclusive access to the deadlock-free lane implemented by a Token. With this extension, Disha Concurrent remains applicable to any topology with a Hamiltonian path including k-ary n-cube networks and is also applicable to tree-based networks",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings of IPPS '96. The 10th International Parallel Processing Symposium (Cat. No.96TB100038)",
    	keywords = "concurrency control;fault tolerant computing;multiprocessor interconnection networks;network routing;parallel architectures;performance evaluation;system recovery;",
    	note = "deadlock-free adaptive wormhole routing;Disha Concurrent;edge;central buffers;deadlock recovery;sequential recovery;resource cost;mutual exclusive access;deadlock-free lane;Token;Hamiltonian path;k-ary n-cube networks;tree-based networks;multiprocessor interconnection networks;",
    	pages = "815 - 21",
    	title = "{G}eneralized theory for deadlock-free adaptive wormhole routing and its application to {D}isha {C}oncurrent",
    	url = "http://dx.doi.org/10.1109/IPPS.1996.508153",
    	year = 1996
    }
    
  114. Rosa Alcover, Pedro Lopez, Jose Duato and L Zunica. Interconnection network design: a statistical analysis of interactions between factors. In Parallel and Distributed Processing, 1996. PDP '96. Proceedings of the Fourth Euromicro Workshop on. January 1996, 211 -218. URL, DOI BibTeX

    @conference{ 500589,
    	author = "Alcover, Rosa and Lopez, Pedro and Duato, Jose and L. Zunica",
    	abstract = "Interconnection network performance depends on several parameters, including network design parameters, network size, message traffic and message length. Simulation is the methodology usually followed in evaluation studies, because the model can more faithfully represent hardware implementation, taking into account more details. Nevertheless, the number of parameter combinations is often high, and simulations also take long to complete. Therefore, evaluation studies must choose a subset of the parameters and restrict the variability of each of them. In a previous paper (IEEE Computer Soc. TCCA Newsletter, pp. 32-37, Aug. 1995), we have proposed a methodology for evaluating interconnection networks. It is based on experimental design used in statistical studies. Using this methodology, we can study network behavior considering many parameters, running only a subset of the simulations required to study all the combination. In addition, the methodology permits us to quantify the effect of interactions among the parameters. In this paper, we make use of the second advantage of this methodology, analysing the effect of node design parameters and their interactions for an 8-ary 3-cube with adaptive wormhole routing",
    	booktitle = "Parallel and Distributed Processing, 1996. PDP '96. Proceedings of the Fourth Euromicro Workshop on",
    	doi = "10.1109/EMPDP.1996.500589",
    	keywords = "8-ary 3-cube;adaptive wormhole routing;evaluation studies;interconnection network design;interconnection network performance;message length;message traffic;network behavior;network design parameters;network size;node design parameters;parameter combinatio",
    	month = "jan",
    	pages = "211 -218",
    	title = "{I}nterconnection network design: a statistical analysis of interactions between factors",
    	url = "http://dx.doi.org/10.1109/EMPDP.1996.500589",
    	year = 1996
    }
    
  115. Rosa Alcover, Pedro Lopez, Jose Duato and L Zunica. Interconnection network design: a statistical analysis of interactions between factors. 1996, 211 - 18. URL BibTeX

    @conference{ 5242395,
    	author = "Alcover, Rosa and Lopez, Pedro and Duato, Jose and L. Zunica",
    	abstract = "Interconnection network performance depends on several parameters, including network design parameters, network size, message traffic and message length. Simulation is the methodology usually followed in evaluation studies, because the model can more faithfully represent hardware implementation, taking into account more details. Nevertheless, the number of parameter combinations is often high, and simulations also take long to complete. Therefore, evaluation studies must choose a subset of the parameters and restrict the variability of each of them. In a previous paper (IEEE Computer Soc. TCCA Newsletter, pp. 32-37, Aug. 1995), we have proposed a methodology for evaluating interconnection networks. It is based on experimental design used in statistical studies. Using this methodology, we can study network behavior considering many parameters, running only a subset of the simulations required to study all the combination. In addition, the methodology permits us to quantify the effect of interactions among the parameters. In this paper, we make use of the second advantage of this methodology, analysing the effect of node design parameters and their interactions for an 8-ary 3-cube with adaptive wormhole routing",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings of the Fourth Euromicro Workshop on Parallel and Distributed Processing - PDP '96",
    	keywords = "design of experiments;multiprocessor interconnection networks;network routing;network synthesis;network topology;performance evaluation;statistical analysis;",
    	note = "interconnection network design;statistical analysis;interconnection network performance;network design parameters;network size;message traffic;message length;simulations;parameter combinations;evaluation studies;parameter variability;network behavior;parameter interactions;node design parameters;8-ary 3-cube;adaptive wormhole routing;",
    	pages = "211 - 18",
    	title = "{I}nterconnection network design: a statistical analysis of interactions between factors",
    	url = "http://dx.doi.org/10.1109/EMPDP.1996.500589",
    	year = 1996
    }
    
  116. Maria E Gomez and H Goldberg. Lepton flavor violation in SUSY SO(10) with predictive Yukawa texture. Physical Review D Particles, Fields, Gravitation and Cosmology 53(9):5244 - 5244, 1996. URL BibTeX

    @article{ 1996110389429,
    	author = "Gomez, Maria E. and H. Goldberg",
    	issn = 05562821,
    	journal = "Physical Review D Particles, Fields, Gravitation and Cosmology",
    	number = 9,
    	pages = "5244 - 5244",
    	title = "{L}epton flavor violation in {SUSY} {SO}(10) with predictive {Y}ukawa texture",
    	url = "http://dx.doi.org/10.1103/PhysRevD.53.5244",
    	volume = 53,
    	year = 1996
    }
    
  117. Jose Duato. Necessary and sufficient condition for deadlock-free routing in cut-through and store-and-forward networks. IEEE Transactions on Parallel and Distributed Systems 7(8):841 - 854, 1996. URL BibTeX

    @article{ 1996463341409,
    	author = "Duato, Jose",
    	abstract = "This paper develops the theoretical background for the design of deadlock-free adaptive routing algorithms for virtual cut-through and store-and-forward switching. This theory is valid for networks using either central buffers or edge buffers. Some basic definitions and three theorems are proposed, developing conditions to verify that an adaptive algorithm is deadlock-free, even when there are cyclic dependencies between routing resources. Moreover, we propose a necessary and sufficient condition for deadlock-free routing. Also, a design methodology is proposed. It supplies fully adaptive, minimal and non-minimal routing algorithms, guaranteeing that they are deadlock-free. The theory proposed in this paper extends the necessary and sufficient condition for wormhole switching previously proposed by us. The resulting routing algorithms are more flexible than the ones for wormhole switching. Also, the design methodology is much easier to apply because it automatically supplies deadlock-free routing algorithms.",
    	address = "Los Alamitos, CA, United States",
    	issn = 10459219,
    	journal = "IEEE Transactions on Parallel and Distributed Systems",
    	key = "Computer networks",
    	keywords = "Adaptive algorithms;Buffer storage;Communication channels;Interconnection networks;Network protocols;Storage allocation;Switching theory;Systems analysis;Telecommunication traffic;",
    	note = "Adaptive routing;Deadlock free routing;Store and forward networks;Virtual cut through;",
    	number = 8,
    	pages = "841 - 854",
    	title = "{N}ecessary and sufficient condition for deadlock-free routing in cut-through and store-and-forward networks",
    	url = "http://dx.doi.org/10.1109/71.532115",
    	volume = 7,
    	year = 1996
    }
    
  118. Jose Duato and M P Malumbres. Optimal topology for distributed shared memory multiprocessors: Hypercubes again?. Lecture Notes in Computer Science 1123:205 - 205, 1996. BibTeX

    @article{ 1996123450253,
    	author = "Duato, Jose and M.P. Malumbres",
    	address = "Lyon, France",
    	issn = 03029743,
    	journal = "Lecture Notes in Computer Science",
    	pages = "205 - 205",
    	title = "{O}ptimal topology for distributed shared memory multiprocessors: {H}ypercubes again?",
    	volume = 1123,
    	year = 1996
    }
    
  119. Jose Duato and M P Malumbres. Optimal topology for distributed shared-memory multiprocessors: hypercubes again?. 1996, 205 - 12. BibTeX

    @conference{ 5464760,
    	author = "Duato, Jose and M.P. Malumbres",
    	abstract = "Many distributed shared memory multiprocessors (DSM) use a direct interconnection network to implement a cache coherence protocol. An interesting characteristic of the message traffic produced by coherence protocols is that all the messages are very short. Most current multicomputers use low dimensional meshes or tori because these topologies usually achieve a higher performance. However, when messages are very short, latency is mainly dominated by the distance traveled in the network. As a consequence, higher dimensional topologies may achieve a lower latency than low dimensional topologies. We compare the 2D mesh and the hypercube topologies assuming a very detailed router model. Network load has been modeled taking into account the traffic produced by cache coherence protocols. Performance results show that average latency for hypercubes is slightly lower than for meshes. Moreover, hypercubes achieve a much higher throughput than meshes, making them suitable for DSMs",
    	address = "Berlin, Germany",
    	journal = "Euro-Par '96 Parallel Processing. Second International Euro-Par Conference. Proceedings",
    	keywords = "distributed memory systems;hypercube networks;memory protocols;message passing;performance evaluation;shared memory systems;",
    	note = "optimal topology;distributed shared memory multiprocessors;DSM;direct interconnection network;message traffic;multicomputers;higher dimensional topologies;low dimensional topologies;2D mesh;hypercube topologies;router model;network load;cache coherence protocols;average latency;",
    	pages = "205 - 12",
    	title = "{O}ptimal topology for distributed shared-memory multiprocessors: hypercubes again?",
    	volume = "vol.1",
    	year = 1996
    }
    
  120. Binh Vien Dao, Jose Duato and Sudhakar Yalamanchili. Configurable flow control mechanisms for fault-tolerant routing. 1995, 220 - 229. BibTeX

    @conference{ 1995482886519,
    	author = "Binh Vien Dao and Duato, Jose and Sudhakar Yalamanchili",
    	abstract = "Fault-tolerant routing protocols in modern interconnection networks rely heavily on the network flow control mechanisms used. Optimistic flow control mechanisms such as wormhole routing (WR) realize very good performance, but are prone to deadlock in the presence of faults. Conservative flow control mechanisms such as pipelined circuit switching (PCS) insures existence of a path to the destination prior to message transmission, but incurs increased overhead. Existing fault-tolerant routing protocols are designed with one or the other, and must accommodate their associated constraints. This paper proposes the use of configurable flow control mechanisms. Routing protocols can then be designed such that in the vicinity of faults, protocols use a more conservative flow control mechanism, while the majority of messages that traverse fault-free portions of the network utilize a WR like flow control to maximize performance. Such protocols are referred to as two-phase protocols, where routing decisions are provided some control over the operation of the virtual channels. This ability provides new avenues for optimizing message passing performance in the presence of faults. A fully adaptive two-phase protocol is proposed and compared via simulation to those based on WR and PCS. The architecture of a network router supporting configurable flow control is described, and the paper concludes with avenues for future research.",
    	address = "Santa Margherita Ligure, Italy",
    	journal = "ACM SIGARCH (Association for Computing Nachinery Special Interest Group on Computer Architecture) - Conference Proceedings",
    	key = "Fault tolerant computer systems",
    	keywords = "Algorithms;Computer architecture;Computer simulation;Constraint theory;Interconnection networks;Multiprocessing systems;Network protocols;Pipeline processing systems;Switching;",
    	note = "Fault tolerant routing;Pipeline circuit switching;Scouting routing;Virtual channels;Wormhole routing;",
    	pages = "220 - 229",
    	title = "{C}onfigurable flow control mechanisms for fault-tolerant routing",
    	year = 1995
    }
    
  121. Binh Vien Dao, Jose Duato and Sudhakar Yalamanchili. Configurable flow control mechanisms for fault-tolerant routing. 1995, 220 - 229. BibTeX

    @conference{ 1995492892924,
    	author = "Binh Vien Dao and Duato, Jose and Sudhakar Yalamanchili",
    	abstract = "Fault-tolerant routing protocols in modern interconnection networks rely heavily on the network flow control mechanisms used. Optimistic flow control mechanisms such as wormhole routing (WR) realize very good performance, but are prone to deadlock in the presence of faults. Conservative flow control mechanisms such as pipelined circuit switching (PCS) insures existence of a path to the destination prior to message transmission, but incurs increased overhead. Existing fault-tolerant routing protocols are designed with one or the other, and must accommodate their associated constraints. This paper proposes the use of configurable flow control mechanisms. Routing protocols can then be designed such that in the vicinity of faults, protocols use a more conservative flow control mechanism, while the majority of messages that traverse fault-free portions of the network utilize a WR like flow control to maximize performance. Such protocols are referred to as two-phase protocols, where routing decisions are provided some control over the operation of the virtual channels. This ability provides new avenues for optimizing message passing performance in the presence of faults. A fully adaptive two-phase protocol is proposed and compared via simulation to those based on WR and PCS. The architecture of a network router supporting configurable flow control is described, and the paper concludes with avenues for future research.",
    	address = "Santa Margherita Ligure, Italy",
    	issn = 08847495,
    	journal = "Conference Proceedings - Annual International Symposium on Computer Architecture, ISCA",
    	key = "Network protocols",
    	keywords = "Congestion control;Critical path analysis;Data communication systems;Interconnection networks;Pipeline processing systems;Telecommunication traffic;",
    	note = "Fault tolerant routing;Flow control mechanisms;Pipelined circuit switching;Wormhole routing;",
    	pages = "220 - 229",
    	title = "{C}onfigurable flow control mechanisms for fault-tolerant routing",
    	year = 1995
    }
    
  122. Binh Vien Dao, Jose Duato and S Yalamanchili. Configurable flow control mechanisms for fault-tolerant routing. 1995, 220 - 9. URL BibTeX

    @conference{ 5086788,
    	author = "Binh Vien Dao and Duato, Jose and S. Yalamanchili",
    	abstract = "Fault-tolerant routing protocols in modern interconnection networks rely heavily on the network flow control mechanisms used. Optimistic flow control mechanisms such as wormhole routing (WR) realize very good performance, but are prone to deadlock in the presence of faults. Conservative flow control mechanisms such as pipelined circuit switching (PCS) insures existence of a path to the destination prior to message transmission, but incurs increased overhead. Existing fault-tolerant routing protocols are designed with one or the other, and must accommodate their associated constraints. This paper proposes the use of configurable flow control mechanisms. Routing protocols can then be designed such that in the vicinity of faults, protocols use a more conservative flow control mechanism, while the majority of messages that traverse fault-free portions of the network utilize a WR like flow control to maximize performance. Such protocols are referred to as two-phase protocols where routing decisions are provided some control over the operation of the virtual channels. This ability provides new avenues for optimizing message passing performance in the presence of faults. A fully adaptive two-phase protocol is proposed and compared via simulation to those based on WR and PCS. The architecture of a network router supporting configurable flow control is described, and the paper concludes with avenues for future research",
    	address = "New York, NY, USA",
    	journal = "Proceedings 22nd Annual International Symposium on Computer Architecture (IEEE Cat. No.95CB35801)",
    	keywords = "fault tolerant computing;message passing;multiprocessor interconnection networks;protocols;",
    	note = "configurable flow control mechanisms;fault-tolerant routing;protocols;interconnection networks;wormhole routing;pipelined circuit switching;message transmission;fault-free portions;message passing performance;",
    	pages = "220 - 9",
    	title = "{C}onfigurable flow control mechanisms for fault-tolerant routing",
    	url = "http://dx.doi.org/10.1109/ISCA.1995.524563",
    	year = 1995
    }
    
  123. Pedro Lopez and Jose Duato. Deadlock-free fully-adaptive minimal routing algorithms: limitations and solutions. Computers and Artificial Intelligence 14(2):105 - 25, 1995. BibTeX

    @article{ 5024414,
    	author = "Lopez, Pedro and Duato, Jose",
    	abstract = "In previous papers, a theory for the design of deadlock-free adaptive routing algorithms as well as a design methodology have been proposed. In this paper, an adaptive routing algorithm, obtained from the application of this theory to the 3D-torus, is evaluated under different load conditions and compared with other algorithms. The results show that this algorithm is very fast, also increasing the network throughput considerably. Nevertheless, this adaptive algorithm has cycles in its channel dependency graph. Consequently, when the network is heavily loaded messages may temporarily block cyclically, drastically reducing the performance of the algorithm. Two mechanisms are proposed to avoid this problem",
    	address = "Slovakia",
    	issn = "0232-0274",
    	journal = "Computers and Artificial Intelligence",
    	keywords = "concurrency control;distributed algorithms;distributed memory systems;distributed processing;message passing;processor scheduling;",
    	note = "deadlock-free fully-adaptive minimal routing algorithm;distributed memory computer;interconnection network;multiprocessor design;theory;3D-torus;three dimensional torus;network throughput;channel dependency graph;message passing;temporary block;",
    	number = 2,
    	pages = "105 - 25",
    	title = "{D}eadlock-free fully-adaptive minimal routing algorithms: limitations and solutions",
    	volume = 14,
    	year = 1995
    }
    
  124. Jose Duato and Pedro Lopez. Highly adaptive wormhole routing algorithms for n-dimensional torus. 1995, 87 - 104. BibTeX

    @conference{ 5513276,
    	author = "Duato, Jose and Lopez, Pedro",
    	abstract = "Deadlock avoidance is a key issue in wormhole networks. A first approach consists of removing the cyclic dependencies between channels. Many deterministic and adaptive routing algorithms have been proposed based on that approach. The absence of cyclic dependencies is a necessary and sufficient condition for deadlock-free deterministic routing. However, it can be relaxed for adaptive routing. A more powerful approach was proposed by us. It only requires the absence of cyclic dependencies on a connected channel subset. The remaining channels can be used in almost any way. In this paper, we show that there exists a more relaxed condition for deadlock-free adaptive routing. This condition is the key for the design of more powerful adaptive routing algorithms. We apply this condition to the design of adaptive routing algorithms for n-dimensional torus. In particular, we propose a partially adaptive routing algorithm which doubles the throughput achieved by the deterministic algorithm without increasing the hardware complexity significantly",
    	address = "New York, NY, USA",
    	journal = "Interconnection Networks and Mapping and Scheduling Parallel Computations. DIMACS Workshop",
    	keywords = "deterministic algorithms;multiprocessor interconnection networks;telecommunication network routing;",
    	note = "wormhole networks;n-dimensional torus;wormhole routing;deadlock avoidance;cyclic dependencies;deterministic routing;deterministic algorithm;",
    	pages = "87 - 104",
    	title = "{H}ighly adaptive wormhole routing algorithms for n-dimensional torus",
    	year = 1995
    }
    
  125. Jose Duato. Necessary and sufficient condition for deadlock-free adaptive routing in wormhole networks. IEEE Transactions on Parallel and Distributed Systems 6(10):1055 - 1067, 1995. URL BibTeX

    @article{ 1996032932822,
    	author = "Duato, Jose",
    	abstract = "Deadlock avoidance is a key issue in wormhole networks. A first approach [8] consists of removing the cyclic dependencies between channels. Many deterministic and adaptive routing algorithms have been proposed based on that approach. Although the absence of cyclic dependencies is a necessary and sufficient condition for deadlock-free deterministic routing, it is only a sufficient condition for deadlock-free adaptive routing. A more powerful approach [11] only requires the absence of cyclic dependencies on a connected channel subset. The remaining channels can be used in almost any way. In this paper, we show that the previously mentioned approach is also a sufficient condition. Moreover, we propose a necessary and sufficient condition for deadlock-free adaptive routing. This condition is the key for the design of fully adaptive routing algorithms with minimum restrictions. An example shows the application of the new theory.",
    	address = "Los Alamitos, CA, United States",
    	issn = 10459219,
    	journal = "IEEE Transactions on Parallel and Distributed Systems",
    	key = "Data communication systems",
    	keywords = "Adaptive algorithms;Bandwidth;Communication channels;Computer networks;Computer system recovery;Multiprocessing systems;Theorem proving;",
    	note = "Adaptive routing;Deadlock avoidance;Routing algorithms;Virtual channels;Wormhole networks;",
    	number = 10,
    	pages = "1055 - 1067",
    	title = "{N}ecessary and sufficient condition for deadlock-free adaptive routing in wormhole networks",
    	url = "http://dx.doi.org/10.1109/71.473515",
    	volume = 6,
    	year = 1995
    }
    
  126. Jose Duato. Theory of deadlock-free adaptive multicast routing in wormhole networks. IEEE Transactions on Parallel and Distributed Systems 6(9):976 - 987, 1995. URL BibTeX

    @article{ 1995512906712,
    	author = "Duato, Jose",
    	abstract = "A theory for the design of deadlock-free adaptive routing algorithms for wormhole networks was proposed in [2], [16]. This theory supplies the sufficient conditions for an adaptive routing algorithm to be deadlock-free, even when there are cyclic dependencies between channels. Also, two design methodologies were proposed. Multicast communication refers to the delivery of the same message from one source node to an arbitrary number of destination nodes. A tree-like routing scheme is not suitable for hardware-supported multicast in wormhole networks because it produces many headers for each message, drastically increasing the probability of a message being blocked. A path-based multicast routing model was proposed in [25] for multicomputers with 2D-mesh and hypercube topologies. In this model, messages are not replicated at intermediate nodes. This paper develops the theoretical background for the design of deadlock-free adaptive multicast routing algorithms. This theory is valid for wormhole networks using the path-based routing model. It is also valid when messages with a single destination and multiple destinations are mixed together. The new channel dependencies produced by messages with several destinations are studied. Also, two theorems are proposed, developing conditions to verify that an adaptive multicast routing algorithm is deadlock-free, even when there are cyclic dependencies between channels. As an example, the multicast routing algorithms presented in [25] are extended, so that they can take advantage of the alternative paths offered by the network.",
    	address = "Los Alamitos, CA, United States",
    	issn = 10459219,
    	journal = "IEEE Transactions on Parallel and Distributed Systems",
    	key = "Interconnection networks",
    	keywords = "Algorithms;Bandwidth;Communication channels (information theory);Congestion control (communication);Data communication systems;Graph theory;Multiprocessing systems;",
    	note = "Adaptive routing;Deadlock avoidance;Multicast routing;Path based multicast;Virtual channels;Wormhole routing;",
    	number = 9,
    	pages = "976 - 987",
    	title = "{T}heory of deadlock-free adaptive multicast routing in wormhole networks",
    	url = "http://dx.doi.org/10.1109/71.466634",
    	volume = 6,
    	year = 1995
    }
    
  127. Jose Duato. A necessary and sufficient condition for deadlock-free adaptive routing in wormhole networks. 1994, 142 - 9. BibTeX

    @conference{ 5247791,
    	author = "Duato, Jose",
    	abstract = "Deadlock avoidance is a key issue in wormhole networks. A first approach (Dally and Seitz, 1987) consists of removing the cyclic dependencies between channels. Although this is a necessary and sufficient condition for deadlock-free deterministic routing, it is only a sufficient condition for deadlock-free adaptive routing. A more powerful approach (Duato, 1991) only requires the absence of cyclic dependencies on a connected channel subset. The remaining channels can be used in almost any way. In this paper, we propose a necessary and sufficient condition for deadlock-free adaptive routing. This condition is the key for the design of maximally adaptive routing algorithms with minimum restrictions. Some examples are given, showing the application of the new theory. In particular, we propose a partially adaptive routing algorithm for k-ary n-cubes which doubles the throughput without increasing the hardware complexity significantly",
    	address = "Boca Raton, FL, USA",
    	journal = "Proceedings of the 1994 International Conference on Parallel Processing",
    	keywords = "computational complexity;fault tolerant computing;multiprocessor interconnection networks;telecommunication network routing;",
    	note = "deadlock-free;adaptive routing;wormhole networks;deadlock avoidance;maximally adaptive routing algorithms;minimum restrictions;k-ary n-cubes;hardware complexity;partially adaptive routing;",
    	pages = "142 - 9",
    	title = "{A} necessary and sufficient condition for deadlock-free adaptive routing in wormhole networks",
    	volume = "vol.1",
    	year = 1994
    }
    
  128. Jose Duato. A theory of fault-tolerant routing in wormhole networks. 1994, 600 - 7. URL BibTeX

    @conference{ 4864748,
    	author = "Duato, Jose",
    	abstract = "Fault-tolerant systems aim at providing continuous operations in the presence of faults. Multicomputers rely on an interconnection network between processors to support the message-passing mechanism. Therefore, the reliability of the interconnection network is very important for the reliability of the whole system. This paper analyzes the effective redundancy available in a wormhole network by combining connectivity and deadlock freedom. Redundancy is defined at the channel level. We propose a sufficient condition for channel redundancy, also computing the set of redundant channels. The redundancy level of the network is also defined, proposing a theorem that supplies its value. This theory is developed on top of our necessary and sufficient condition for deadlock-free adaptive routing. Finally, a fault-tolerant routing algorithm for n-dimensional meshes is proposed",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings 1994 International Conference on Parallel and Distributed Systems (Cat. No.94TH06817)",
    	keywords = "concurrency control;fault tolerant computing;message passing;multiprocessor interconnection networks;network routing;parallel algorithms;reliability;",
    	note = "fault-tolerant routing;wormhole networks;fault-tolerant systems;continuous operations;multicomputers;interconnection network;message-passing;interconnection network reliability;redundancy;connectivity;deadlock;channel level;channel redundancy;deadlock-free adaptive routing;fault-tolerant routing algorithm;n-dimensional meshes;",
    	pages = "600 - 7",
    	title = "{A} theory of fault-tolerant routing in wormhole networks",
    	url = "http://dx.doi.org/10.1109/ICPADS.1994.590404",
    	year = 1994
    }
    
  129. Jose Duato. A theory to increase the effective redundancy in wormhole networks. Parallel Processing Letters 4(1-2):125 - 38, 1994. BibTeX

    @article{ 4749319,
    	author = "Duato, Jose",
    	abstract = "Fault-tolerant systems aim at providing continuous operations in the presence of faults. Multicomputers rely on an interconnection network between processors to support the message-passing mechanism. Therefore, the reliability of the interconnection network is very important for the reliability of the whole system. This paper analyses the effective redundancy available in a wormhole network by combining connectivity and deadlock freedom. Redundancy is defined at the channel level, giving a sufficient condition for a channel to be redundant and computing the set of redundant channels. The redundancy level of the network is also defined, proposing a theorem that supplies a lower bound for it. Finally, a fault-tolerant routing algorithm based on the former theory is proposed",
    	address = "Singapore",
    	issn = "0129-6264",
    	journal = "Parallel Processing Letters",
    	keywords = "concurrency control;fault tolerant computing;multiprocessor interconnection networks;redundancy;",
    	note = "effective redundancy;wormhole networks;fault-tolerant systems;continuous operations;message-passing mechanism;interconnection network;deadlock freedom;lower bound;fault-tolerant routing algorithm;",
    	number = "1-2",
    	pages = "125 - 38",
    	title = "{A} theory to increase the effective redundancy in wormhole networks",
    	volume = 4,
    	year = 1994
    }
    
  130. Ziqiang Liu and Jose Duato. Adaptive unicast and multicast in 3D mesh networks. 1994, 173 - 182. BibTeX

    @conference{ 1994101398696,
    	author = "Ziqiang Liu and Duato, Jose",
    	abstract = "In this paper, we present an adaptive unicast and multicast routing algorithm for 3D mesh networks with wormhole routing and virtual channel flow control, which is called adaptive-cast. The unique feature of the adaptive-cast is that it is valid when messages with a single destination (unicast) and with multiple destinations (multicast) are mixed together, which drastically simplifies the implementation of the router. Also, only two virtual channels per physical channel are needed to support the adaptive-cast. Our simulation experiment result in 10 × 10 × 10 3D mesh have confirmed that the adaptive-cast achieves better performance than the corresponding static routing algorithm under both uniform and nonuniform traffic patterns.",
    	address = "Wailea, HI, USA",
    	issn = 10603425,
    	journal = "Proceedings of the Hawaii International Conference on System Sciences",
    	key = "Algorithms",
    	keywords = "Communication channels;Computer hardware;Computer networks;Computer simulation;Critical path analysis;Data communication systems;Network protocols;Packet switching;Telecommunication traffic;Three dimensional;Virtual storage;",
    	note = "Adaptive multicast;Adaptive unicast;Multicast communication;Router;Three dimensional mesh networks;Virtual channel;Virtual channel flow control;Wormhole routing;",
    	pages = "173 - 182",
    	title = "{A}daptive unicast and multicast in 3{D} mesh networks",
    	volume = 1,
    	year = 1994
    }
    
  131. Ziqiang Liu and Jose Duato. Adaptive unicast and multicast in 3D mesh networks. 1994, 173 - 82. URL BibTeX

    @conference{ 4682102,
    	author = "Ziqiang Liu and Duato, Jose",
    	abstract = "Presents an adaptive unicast and multicast routing algorithm for 3D mesh networks with wormhole routing and virtual channel flow control, which is called adaptive-cast. The unique feature of the adaptive-cast is that it is valid when messages with a single destination (unicast) and with multiple destinations (multicast) are mixed together, which drastically simplifies the implementation of the router. Also, only two virtual channels per physical channel are needed to support the adaptive-cast. The authors' simulation experiment results in 10{{\&}}times;10{{\&}}times;10 3D mesh have confirmed that the adaptive-cast achieves better performance than the corresponding static routing algorithm under both uniform and nonuniform traffic patterns",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings of the Twenty-Seventh Hawaii Internation Conference on System Sciences Vol. I: Architecture (Cat. No.94TH0607-2)",
    	keywords = "multiprocessor interconnection networks;network routing;performance evaluation;",
    	note = "3D mesh networks;multicast routing algorithm;unicast routing algorithm;adaptive unicast;adaptive-cast;wormhole routing;virtual channel flow control;performance;",
    	pages = "173 - 82",
    	title = "{A}daptive unicast and multicast in 3{D} mesh networks",
    	url = "http://dx.doi.org/10.1109/HICSS.1994.323174",
    	year = 1994
    }
    
  132. Jose Duato. Improving the efficiency of virtual channels with time-dependent selection functions. Future Generation Computer Systems 10(1):45 - 58, 1994. URL BibTeX

    @article{ 1994081333099,
    	author = "Duato, Jose",
    	abstract = "In previous papers, a new theory for the design of deadlock-free adaptive routing algorithms for wormhole and store-and-forward networks as well as two design methodologies have been proposed. Also, a new adaptive routing algorithm, obtained from the application of the former theory to the binary n-cube, has been evaluated using both, a uniform and an exponential distribution for message destination. The results are good, especially for large networks and a uniform distribution for message destination. When locality is exploited, the results are comparatively worse, mainly due to the reduction in channel bandwidth produced by channel multiplexing. In this paper, we analyze the advantages and disadvantages produced by the use of virtual channels, proposing a new approach to maximize their efficiency. This approach uses time-dependent selection functions, associating a threshold to some virtual channels. Those channels cannot be selected by a message unless it is waiting for longer than the corresponding threshold. The evaluation of the new selection function for the binary n-cube shows an important improvement, especially when locality is exploited.",
    	address = "Amsterdam, Netherlands",
    	issn = "0167739X",
    	journal = "Future Generation Computer Systems",
    	key = "Virtual storage",
    	keywords = "Adaptive control systems;Algorithms;Communication channels (information theory);Computer networks;Computer system recovery;Critical path analysis;Data communication systems;Data processing;Large scale systems;Multiprocessing systems;Packet switching;",
    	note = "Adaptive routing;Time dependent selection functions;Virtual channels;Wormhole routing;",
    	number = 1,
    	pages = "45 - 58",
    	title = "{I}mproving the efficiency of virtual channels with time-dependent selection functions",
    	url = "http://dx.doi.org/10.1016/0167-739X(94)90050-7",
    	volume = 10,
    	year = 1994
    }
    
  133. Jose Duato. Improving the efficiency of virtual channels with time-dependent selection functions. Computers and Artificial Intelligence 13(1):25 - 44, 1994. BibTeX

    @article{ 4717789,
    	author = "Duato, Jose",
    	abstract = "In previous papers, a new theory for the design of deadlock-free adaptive routing algorithms for wormhole and store-and-forward networks as well as two design methodologies have been proposed. A new adaptive routing algorithm, obtained from the application of the former theory to the binary n-cube, has been also evaluated using both, a uniform and an exponential distribution for message destination. The results are good, especially for large networks and a uniform distribution for message destination. When locality is exploited, the results are comparatively worse, mainly due to the reduction in channel bandwidth produced by channel multiplexing. We analyse the advantages and disadvantages produced by the use of virtual channels proposing a new approach to maximize their efficiency. This approach uses time-dependent selection functions associating a threshold to some virtual channels. Those channels cannot be selected by a message unless it is waiting for longer than the corresponding threshold. The evaluation of the new selection function for the binary n-cube shows an important improvement, especially when locality is exploited",
    	address = "Slovakia",
    	issn = "0232-0274",
    	journal = "Computers and Artificial Intelligence",
    	keywords = "adaptive systems;algorithm theory;multiplexing;multiprocessor interconnection networks;",
    	note = "virtual channels;efficiency;time-dependent selection functions;deadlock-free adaptive routing algorithms;store-and-forward networks;wormhole networks;design methodologies;adaptive routing algorithm;binary n-cube;message destination;locality;channel bandwidth;channel multiplexing;",
    	number = 1,
    	pages = "25 - 44",
    	title = "{I}mproving the efficiency of virtual channels with time-dependent selection functions",
    	volume = 13,
    	year = 1994
    }
    
  134. Jose Duato and Pedro Lopez. Performance evaluation of adaptive routing algorithms for k-ary n-cubes. Number 853, pages 45 - 45, 1994. BibTeX

    @inbook{ 1994122484814,
    	author = "Duato, Jose and Lopez, Pedro",
    	address = "Seattle, WA, United states",
    	issn = 03029743,
    	journal = "Lecture Notes in Computer Science",
    	number = 853,
    	pages = "45 - 45",
    	title = "{P}erformance evaluation of adaptive routing algorithms for k-ary n-cubes",
    	year = 1994
    }
    
  135. Jose Duato and Pedro Lopez. Performance evaluation of adaptive routing algorithms for k-ary n-cubes. 1994, 45 - 59. BibTeX

    @conference{ 4897362,
    	author = "Duato, Jose and Lopez, Pedro",
    	abstract = "Deadlock avoidance is a key issue in wormhole networks. A first approach consists in removing the cyclic dependencies between channels. Although the absence of cyclic dependencies is a necessary and sufficient condition for deadlock-free deterministic routing, it is only a sufficient condition for deadlock-free adaptive routing. A more powerful approach only requires the absence of cyclic dependencies on a connected channel subset. Moreover, we proposed a necessary and sufficient condition for deadlock-free adaptive routing previously (1994). In this paper, we design adaptive routing algorithms for k-ary n-cubes. In particular, we propose partially adaptive and fully adaptive routing algorithms which considerably increase the throughput achieved by the deterministic routing algorithm. Also, we evaluate the performance of the new routing algorithms under both, uniform and non-uniform distribution of message destinations",
    	address = "Berlin, Germany",
    	journal = "Parallel Computer Routing and Communication. First International Workshop, PCRCW '94. Proceedings",
    	keywords = "concurrency control;multiprocessor interconnection networks;performance evaluation;telecommunication network routing;",
    	note = "performance evaluation;adaptive routing algorithms;k-ary n-cubes;deadlock avoidance;wormhole networks;cyclic dependencies;necessary and sufficient condition;connected channel subset;deterministic routing algorithm;routing algorithms;",
    	pages = "45 - 59",
    	title = "{P}erformance evaluation of adaptive routing algorithms for k-ary n-cubes",
    	year = 1994
    }
    
  136. Jose Duato, B V Dao, P T Gaughan and S Yalamanchili. Scouting: fully adaptive, deadlock-free routing in faulty pipelined networks. 1994, 608 - 13. URL BibTeX

    @conference{ 4864749,
    	author = "Duato, Jose and B.V. Dao and P.T. Gaughan and S. Yalamanchili",
    	abstract = "Adaptive routing protocols based on message pipelining using wormhole routing (WR) can provide superior performance. However, the occurrence of faults can lead to situations that may produce deadlock. Variants of adaptive WR have been introduced (P.T. Gaughan and S. Yalamanchili, 1992) that employ backtracking and misrouting to first establish a path, followed by message pipelining (pipelined circuit switching, or PCS). This scheme avoids deadlock due to faults, but is overly conservative leading to reduced performance. The paper introduces a new family of flow control mechanisms ranging from WR to PCS that offers a compromise by only decoupling the routing probe and the data fits the minimal extent required to provide deadlock-free routing in the presence of faults",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings 1994 International Conference on Parallel and Distributed Systems (Cat. No.94TH06817)",
    	keywords = "adaptive systems;concurrency control;fault tolerant computing;multiprocessor interconnection networks;network routing;parallel architectures;pipeline processing;reliability;",
    	note = "deadlock-free routing;faulty pipelined networks;scouting;adaptive routing protocols;message pipelining;wormhole routing;adaptive WR;pipelined circuit switching;PCS;flow control mechanisms;routing probe;minimal extent;fault tolerant routing;",
    	pages = "608 - 13",
    	title = "{S}couting: fully adaptive, deadlock-free routing in faulty pipelined networks",
    	url = "http://dx.doi.org/10.1109/ICPADS.1994.590406",
    	year = 1994
    }
    
  137. Jose Duato, B V Dao, P T Gaughan and S Yalamanchili. Scouting: fully adaptive, deadlock-free routing in faulty pipelined networks. 1994, 608 - 613. BibTeX

    @conference{ 1995282705474,
    	author = "Duato, Jose and B.V. Dao and P.T. Gaughan and S. Yalamanchili",
    	abstract = "Adaptive routing protocols based on message pipelining using wormhole routing (WR) can provide superior performance. However, the occurrence of faults can lead to situations that may produce deadlock. Variants of adaptive WR have been introduced[10] that employ backtracking and misrouting to first establish a path, followed by message pipelining (pipelined circuit switching, or PCS). This scheme avoids deadlocks due to faults, but is overly conservative leading to reduced performance. This paper introduces a new family of flow control mechanisms ranging from WR to PCS that offers a compromise by only decoupling the routing probe and the data flits the minimal extent required to provide deadlock-free routing in the presence of faults.",
    	address = "Hsinchu, China",
    	journal = "Proceedings of the Internatoinal Conference on Parallel and Distributed Systems - ICPADS",
    	key = "Network protocols",
    	keywords = "Computer networks;Computer system recovery;Fault tolerant computer systems;Performance;Pipeline processing systems;",
    	note = "Deadlock free routing;Faulty pipelined networks;Message pipelining;Pipelined circuit switching;Scouting;Wormhole routing;",
    	pages = "608 - 613",
    	title = "{S}couting: fully adaptive, deadlock-free routing in faulty pipelined networks",
    	year = 1994
    }
    
  138. Jose Duato. Theory of fault-tolerant routing in wormhole networks. 1994, 600 - 607. BibTeX

    @conference{ 1995282705473,
    	author = "Duato, Jose",
    	abstract = "Fault-tolerant systems aim at providing continuous operations in the presence of faults. Multicomputers rely on an interconnection network between processors to support the message-passing mechanism. Therefore, the reliability of the interconnection network is very important for the reliability of the whole system. This paper analyzes the effective redundancy available in a wormhole network by combining connectivity and deadlock freedom. Redundancy is defined at the channel level. We propose a sufficient condition for channel redundancy, also computing the set of redundant channels. The redundancy level of the network is also defined, proposing a theorem that supplies its value. This theory is developed on top of our necessary and sufficient condition for deadlock-free adaptive routing. Finally, a fault-tolerant routing algorithm for n-dimensional meshes is proposed.",
    	address = "Hsinchu, China",
    	journal = "Proceedings of the Internatoinal Conference on Parallel and Distributed Systems - ICPADS",
    	key = "Fault tolerant computer systems",
    	keywords = "Algorithms;Communication channels (information theory);Computation theory;Computer networks;Computer system recovery;Data communication systems;Interconnection networks;Multiprocessing systems;Redundancy;Reliability;Theorem proving;",
    	note = "Deadlock free adaptive routing;Fault tolerant routing;Message passing mechanism;Virtual channels;Wormhole networks;",
    	pages = "600 - 607",
    	title = "{T}heory of fault-tolerant routing in wormhole networks",
    	year = 1994
    }
    
  139. Jose Duato. Theory to increase the effective redundancy in wormhole networks. Parallel Processing Letters 4(1-2):125 - 138, 1994. BibTeX

    @article{ 1995042477806,
    	author = "Duato, Jose",
    	abstract = "Fault-tolerant systems aim at providing continuous operations in the presence of faults. Multicomputers rely on an interconnection network between processors to support the message-passing mechanism. Therefore, the reliability of the interconnection network is very important for the reliability of the whole system. This paper analyses the effective redundancy available in a wormhole network by combining connectivity and deadlock freedom. Redundancy is defined at the channel level, giving a sufficient condition for a channel to be redundant and computing the set of redundant channels. The redundancy level of the network is also defined, proposing a theorem that supplies a lower bound for it. Finally, a fault-tolerant routing algorithm based on the former theory is proposed.",
    	address = "Singapore, Singapore",
    	issn = 02196264,
    	journal = "Parallel Processing Letters",
    	key = "Fault tolerant computer systems",
    	keywords = "Algorithms;Communication channels (information theory);Computational methods;Computer system recovery;Data communication systems;Error analysis;Large scale systems;Multiprocessing systems;Program processors;Redundancy;",
    	note = "Adaptive routing;Deadlock avoidance;Wormhole routing;",
    	number = "1-2",
    	pages = "125 - 138",
    	title = "{T}heory to increase the effective redundancy in wormhole networks",
    	volume = 4,
    	year = 1994
    }
    
  140. Jose Duato. Theory to increase the effective redundancy in wormhole networks. Parallel processing letters 4(1-2):125 - 138, 1994. BibTeX

    @article{ 1995132546653,
    	author = "Duato, Jose",
    	abstract = "Fault-tolerant systems aim at providing continuous operations in the presence of faults. Multicomputers rely on an interconnection network between processors to support the message-passing mechanism. Therefore, the reliability of the interconnection network is very important for the reliability of the whole system. This paper analyses the effective redundancy available in a wormhole network by combining connectivity and deadlock freedom. Redundancy is defined at the channel level, giving a sufficient condition for a channel to be redundant and computing the set of redundant channels. The redundancy level of the network is also defined, proposing a theorem that supplies a lower bound for it. Finally, a fault-tolerant routing algorithm based on the former theory is proposed.",
    	issn = 01296264,
    	journal = "Parallel processing letters",
    	key = "Fault tolerant computer systems",
    	keywords = "Adaptive systems;Algorithms;Computer networks;Computer system recovery;Multiprocessing systems;Redundancy;Reliability;System theory;",
    	note = "Adaptive routing;Deadlock avoidance;Fault tolerance;Multicomputers;Wormhole networks;",
    	number = "1-2",
    	pages = "125 - 138",
    	title = "{T}heory to increase the effective redundancy in wormhole networks",
    	volume = 4,
    	year = 1994
    }
    
  141. Jose Duato. A new theory of deadlock-free adaptive multicast routing in wormhole networks. 1993, 64 - 71. URL BibTeX

    @conference{ 4945959,
    	author = "Duato, Jose",
    	abstract = "A theory for the design of deadlock-free adaptive routing algorithms for wormhole networks has been proposed previously. This theory supplies the sufficient conditions for an adaptive routing algorithm to be deadlock-free, even when there are cyclic dependencies between channels. Also, two design methodologies have been proposed. Multicast communication refers to the delivery of the same message from one source node to an arbitrary number of destination nodes. Two multicast wormhole routing methods have been presented previously for multicomputers with 2D-mesh and hypercube topologies. This paper develops the theoretical background for the design of deadlock-free adaptive multicast routing algorithms for wormhole networks. Some basic definitions and two theorems are proposed, developing conditions to verify that an adaptive multicast routing algorithm is deadlock-free, even when there are cyclic dependencies between channels. As an example, the multicast routing algorithms presented previously are extended, so that they can take advantage of the alternative paths offered by the network",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings of the Fifth IEEE Symposium on Parallel and Distributed Processing (Cat. No.93TH0584-3)",
    	keywords = "hypercube networks;message passing;network routing;",
    	note = "deadlock-free adaptive multicast routing;wormhole networks;deadlock-free adaptive routing algorithms;cyclic dependencies;destination nodes;multicomputers;2D-mesh;hypercube topologies;",
    	pages = "64 - 71",
    	title = "{A} new theory of deadlock-free adaptive multicast routing in wormhole networks",
    	url = "http://dx.doi.org/10.1109/SPDP.1993.395549",
    	year = 1993
    }
    
  142. Jose Duato. A theory to increase the effective redundancy in wormhole networks. 1993, 277 - 88. BibTeX

    @conference{ 4616928,
    	author = "Duato, Jose",
    	abstract = "Fault-tolerant systems aim at providing continuous operations in the presence of faults. Multicomputers rely on an interconnection network between processors to support the message-passing mechanism. Therefore, the reliability of the interconnection network is very important for the reliability of the whole system. This paper analyses the effective redundancy available in a wormhole network by combining connectivity and deadlock freedom. Redundancy is defined at the channel level, giving a sufficient condition for a channel to be redundant and computing the set of redundant channels. The redundancy level of the network is also defined, proposing a theorem that supplies a lower bound for it. Finally, a fault tolerant routing algorithm based on the former theory is proposed",
    	address = "Netherlands",
    	issn = "0926-5473",
    	journal = "IFIP Transactions A (Computer Science and Technology)",
    	keywords = "computer networks;distributed memory systems;fault tolerant computing;message passing;multiprocessor interconnection networks;redundancy;",
    	note = "redundancy;wormhole networks;fault-tolerant systems;multicomputers;interconnection network;message-passing;reliability;connectivity;deadlock freedom;channel level;",
    	pages = "277 - 88",
    	title = "{A} theory to increase the effective redundancy in wormhole networks",
    	volume = "A-39",
    	year = 1993
    }
    
  143. Pedro Lopez and Jose Duato. Deadlock-free adaptive routing algorithms for the 3D-torus: limitations and solutions. 1993, 684 - 7. BibTeX

    @conference{ 4585304,
    	author = "Lopez, Pedro and Duato, Jose",
    	abstract = "A deadlock-free adaptive routing algorithm, obtained from the application of the theory proposed by J. Duato (1991) to the 3D-torus, is evaluated under different load conditions and compared with other algorithms. The results show that this algorithm is very fast, also increasing the network throughput considerably. Nevertheless, this adaptive algorithm has cycles in its channel dependency graph. As a consequence, when the network is heavily loaded messages may temporarily block cyclically, drastically reducing the performance of the algorithm. Two mechanisms are proposed to avoid this problem",
    	address = "Berlin, Germany",
    	journal = "PARLE '93 Parallel Architectures and Languages Europe. 5th International PARLE Conference Proceedings",
    	keywords = "multiprocessor interconnection networks;performance evaluation;",
    	note = "deadlock-free adaptive routing algorithms;3D-torus;network throughput;channel dependency graph;",
    	pages = "684 - 7",
    	title = "{D}eadlock-free adaptive routing algorithms for the 3{D}-torus: limitations and solutions",
    	year = 1993
    }
    
  144. J M Garcia and Jose Duato. Dynamic reconfiguration of multicomputer networks: limitations and tradeoffs. 1993, 317 - 23. URL BibTeX

    @conference{ 4658021,
    	author = "J.M. Garcia and Duato, Jose",
    	abstract = "The dynamic reconfiguration of the interconnection network is an advanced feature of some multicomputers to reduce the communication overhead. Up to now, the work carried out in this field has focused on static switching, i.e., the network changes its topology before starting the execution of a phase of an application program and then it remains constant throughout the phase execution. However, the authors' work focuses on true dynamic reconfiguration, i.e., the network topology can change almost arbitrarily at runtime. In a previous paper (see Garcia and Duato, 1991), they presented an algorithm to handle the dynamic reconfiguration and some simulation results, showing the benefits achieved by this reconfiguration algorithm. In this paper, they expound in depth the reconfiguration algorithm and the different concepts related to it. The previous work is analyzed and compared with their algorithm, showing the improvements achieved",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings. Euromicro Workshop on Parallel and Distributed Processing",
    	keywords = "multiprocessor interconnection networks;network topology;parallel algorithms;reconfigurable architectures;",
    	note = "dynamic reconfiguration;multicomputer networks;interconnection network;communication overhead;network topology;reconfiguration algorithm;application program;runtime;",
    	pages = "317 - 23",
    	title = "{D}ynamic reconfiguration of multicomputer networks: limitations and tradeoffs",
    	url = "http://dx.doi.org/10.1109/EMPDP.1993.336386",
    	year = 1993
    }
    
  145. Z Liu, Jose Duato and L -E Thorelli. Grouping virtual channels for deadlock-free adaptive wormhole routing. 1993, 254 - 65. BibTeX

    @conference{ 4607908,
    	author = "Z. Liu and Duato, Jose and L.-E. Thorelli",
    	abstract = "Recently, intensive research has been done to develop adaptive deadlock-free wormhole routing strategies for interconnection networks. One effective method is to partition the physical network into several virtual networks such that there is no channel dependency cycle in each of them even if full or partial adaptive routing strategies are used. However, each physical channel can be split into more virtual channels than the number necessary to set up the virtual networks. The additional virtual channels can be considered as one resource pool for all virtual networks. It means the packet which is blocked in one virtual network can borrow one free valid virtual channel from the resource pool, returning it to the resource pool when it is released. The authors call this scheme the grouping technique and have applied it to double-y adaptive routing on a 2D mesh network, producing a new fully adaptive routing algorithm called group-double-y. The simulation results show that with heavily loaded network it can double/(increase 26%) the average physical channel utilization under uniform/matrix-transpose traffic pattern. They have also applied the grouping technique in the Turn model on a 2D mesh network, producing a fully adaptive, minimum and nonminimum routing algorithm called group-turn-model. Compared with group-double-y, the simulation results show that with heavily loaded network the group-turn-model increases/decreases the average physical channel utilization by (12%)/(2%) under matrix-transpose/uniform traffic pattern",
    	address = "Berlin, Germany",
    	journal = "PARLE '93 Parallel Architectures and Languages Europe. 5th International PARLE Conference Proceedings",
    	keywords = "digital simulation;multiprocessor interconnection networks;",
    	note = "virtual channels grouping;deadlock-free adaptive wormhole routing;interconnection networks;virtual networks;grouping technique;2D mesh network;simulation results;Turn model;nonminimum routing algorithm;group-turn-model;group-double-y;average physical channel utilization;",
    	pages = "254 - 65",
    	title = "{G}rouping virtual channels for deadlock-free adaptive wormhole routing",
    	year = 1993
    }
    
  146. Jose Duato. New theory of deadlock-free adaptive multicast routing in wormhole networks. 1993, 64 - 71. BibTeX

    @conference{ 1994041213869,
    	author = "Duato, Jose",
    	abstract = "A theory for the design of deadlock-free adaptive routing algorithms for wormhole networks has been proposed in [11, 14]. This theory supplies the sufficient conditions for an adaptive routing algorithm to be deadlock-free, even when there are cyclic dependencies between channels. Also, two design methodologies have been proposed. Multicast communication refers to the delivery of the same message from one source node to an arbitrary number of destination nodes. Two multicast wormhole routing methods have been presented in [22] for multicomputers with 2D-mesh and hypercube topologies. This paper develops the theoretical background for the design of deadlock-free adaptive multicast routing algorithms for wormhole networks. Some basic definitions and two theorems are proposed, developing conditions to verify that an adaptive multicast routing algorithm is deadlock-free, even when there are cyclic dependencies between channels. As an example, the multicast routing algorithms presented in [22] are extended, so that they can take advantage of the alternative paths offered by the network.",
    	address = "Dallas, TX, USA",
    	journal = "Proceedings of the 5th IEEE Symposium on Parallel and Distributed Processing",
    	key = "Multiprocessing systems",
    	keywords = "Adaptive systems;Algorithms;Computer networks;Computer system recovery;Data communication systems;Data processing;Electric network topology;Program processors;Storage allocation (computer);",
    	note = "Cyclic dependencies;Dally's theorem;Deadlock free adaptive multicast routing;Hypercube topologies;Multicast communications;Wormhole networks;Wormhole routing;",
    	pages = "64 - 71",
    	title = "{N}ew theory of deadlock-free adaptive multicast routing in wormhole networks",
    	year = 1993
    }
    
  147. Jose Duato. New theory of deadlock-free adaptive routing in wormhole networks. IEEE Transactions on Parallel and Distributed Systems 4(12):1320 - 1331, 1993. URL BibTeX

    @article{ 1994041229381,
    	author = "Duato, Jose",
    	abstract = "Second generation multicomputers use wormhole routing, allowing a very low channel setup time and drastically reducing the dependency between network latency and internode distance. Deadlock-free routing strategies have been developed, allowing the implementation of fast hardware routers that reduce the communication bottleneck. Also, adaptive routing algorithms with deadlock-avoidance or deadlock-recovery techniques have been proposed for some topologies, being very effective and outperforming static strategies. This paper develops the theoretical background for the design of deadlock-free adaptive routing algorithms for wormhole networks. Some basic definitions and two theorems are proposed, developing conditions to verify that an adaptive algorithm is deadlock-free, even when there are cycles in the channel dependency graph. Also, two design methodologies are proposed. The first one supplies algorithms with a high degree of freedom, without increasing the number of physical channels. The second methodology is intended for the design of fault-tolerant algorithms. Some examples are given, showing the application of the methodologies. Finally, some simulations show the performance improvement that can be achieved by designing the routing algorithms with the new theory.",
    	issn = 10459219,
    	journal = "IEEE Transactions on Parallel and Distributed Systems",
    	key = "Computer networks",
    	keywords = "Algorithms;Computer system recovery;Data communication systems;Data processing;Electric network topology;Fault tolerant computer systems;Graph theory;Multiprocessing systems;Virtual storage;",
    	note = "Adaptive routing;Deadlock avoidance;Design methodologies;Fault tolerance;Multicomputers;Virtual channels;Wormhole routing;",
    	number = 12,
    	pages = "1320 - 1331",
    	title = "{N}ew theory of deadlock-free adaptive routing in wormhole networks",
    	url = "http://dx.doi.org/10.1109/71.250114",
    	volume = 4,
    	year = 1993
    }
    
  148. Jose Duato. On the design of deadlock-free adaptive multicast routing algorithms. Parallel Processing Letters 3(4):321 - 33, 1993. BibTeX

    @article{ 4704978,
    	author = "Duato, Jose",
    	abstract = "Multicast communication refers to the delivery of the same message from one source node to an arbitrary number of destination nodes. Two multicast wormhole routing methods have been presented previously by X. Lin and L.M. Ni (1991) for multicomputers with 2D-mesh and hypercube topologies. Also, a theory for the design of deadlock-free adaptive routing algorithms for wormhole networks has been proposed previously by J. Duato ( 1991, 1993 ). This theory supplies the sufficient conditions for an adaptive routing algorithm to be deadlock-free, even when there are cyclic dependencies between channels. This paper analyses the additional channel dependencies produced by multicast routing algorithms on wormhole networks. Then, the theory proposed previously is extended by considering them. As an example, the multicast routing algorithms are extended, taking advantage of the alternative paths offered by the network",
    	address = "Singapore",
    	issn = "0129-6264",
    	journal = "Parallel Processing Letters",
    	keywords = "concurrency control;hypercube networks;system recovery;",
    	note = "deadlock-free adaptive multicast routing algorithms;multicast communication;2D-mesh;hypercube topologies;wormhole networks;sufficient conditions;cyclic dependencies;",
    	number = 4,
    	pages = "321 - 33",
    	title = "{O}n the design of deadlock-free adaptive multicast routing algorithms",
    	volume = 3,
    	year = 1993
    }
    
  149. Jose Duato. Theory to increase the effective redundancy in wormhole networks. Number A-39, pages 277 - 288, 1993. BibTeX

    @inbook{ 1994111410611,
    	author = "Duato, Jose",
    	abstract = "Fault-tolerant systems aim at providing continuous operations in the presence of faults. Multicomputers rely on an interconnection network between processors to support the message-passing mechanism. Therefore, the reliability of the interconnection network is very important for the reliability of the whole system. This paper analyses the effective redundancy available in a wormhole network by combining connectivity and deadlock freedom. Redundancy is defined at the channel level, giving a sufficient condition for a channel to be redundant and computing the set of redundant channels. The redundancy level of the network is also defined, proposing a theorem that supplies a lower bound for it. Finally, a fault-tolerant routing algorithm based on the former theory is proposed.",
    	address = "Palma de Mallorca, Spain",
    	issn = 09265473,
    	journal = "IFIP Transactions A: Computer Science and Technology",
    	key = "Multiprocessing systems",
    	keywords = "Algorithms;Communication channels;Computer networks;Computer system recovery;Critical path analysis;Data communication systems;Fault tolerant computer systems;Graph theory;Redundancy;Reliability;Theorem proving;",
    	note = "Connectivity;Deadlock freedom;Routing;Wormhole networks;",
    	number = "A-39",
    	pages = "277 - 288",
    	title = "{T}heory to increase the effective redundancy in wormhole networks",
    	year = 1993
    }
    
  150. Jose Duato. Theory to increase the effective redundancy in wormhole networks. 1993, 277 - 277. BibTeX

    @conference{ 1995072767955,
    	author = "Duato, Jose",
    	address = "Palma de Mallorca, Spain",
    	pages = "277 - 277",
    	title = "{T}heory to increase the effective redundancy in wormhole networks",
    	year = 1993
    }
    
  151. J M Garcia and Jose Duato. An advanced environment for programming transputer networks with dynamic reconfiguration. 1992, 601 - 10. BibTeX

    @conference{ 4513529,
    	author = "J.M. Garcia and Duato, Jose",
    	abstract = "The authors present a programming environment for multicomputers. Among other features, it allows them to evaluate the performance of parallel algorithms running on a multicomputer with both, static and dynamically reconfigurable topologies. The results of this evaluation are obtained by simulating a machine model based on a transputer network. Their environment-called FDP-permits the simulation of the behaviour of a multicomputer. Several machine parameters can be adjusted. For example, the authors can vary the network topology, the number of nodes, the routing algorithm in the network, etc. Choosing different options is easy, because FDP has a friendly user interface. In their environment, a parallel algorithm is programmed in the distributed Pascal language. This new parallel language, which they have developed, is based on standard Pascal. Some extensions allow an easy and elegant programming of parallel algorithms, consisting of processes which communicate by means of message-passing",
    	address = "Barcelona, Spain",
    	journal = "Parallel Computing and Transputer Applications",
    	keywords = "message passing;parallel algorithms;parallel processing;Pascal;programming environments;user interfaces;",
    	note = "performance evaluation;static topologies;advanced environment;programming transputer networks;dynamic reconfiguration;programming environment;parallel algorithms;dynamically reconfigurable topologies;machine model;transputer network;routing algorithm;user interface;distributed Pascal language;message-passing;",
    	pages = "601 - 10",
    	title = "{A}n advanced environment for programming transputer networks with dynamic reconfiguration",
    	year = 1992
    }
    
  152. Jose Duato. Channel classes: a new concept for deadlock avoidance in wormhole networks. Parallel Processing Letters 2(4):347 - 54, 1992. BibTeX

    @article{ 4573086,
    	author = "Duato, Jose",
    	abstract = "The author has developed the theoretical background for the design of deadlock-free adaptive routing algorithms for store-and-forward and wormhole networks. Some definitions and theorems have been proposed, developing conditions to verify that an adaptive algorithm is deadlock-free, even when they are cyclic dependencies between channels. Also, two design methodologies have been proposed. She proposes a partial order between channels as well as an equivalence relation. This relation splits the set of channels into equivalence classes. Then, she extends her previous theory by considering equivalence classes (channel classes) instead of channels. This extension drastically simplifies the verification of deadlock freedom for adaptive routing algorithms with cyclic dependencies between channels. Finally, she presents an example",
    	address = "Singapore",
    	issn = "0129-6264",
    	journal = "Parallel Processing Letters",
    	keywords = "concurrency control;message passing;multiprocessor interconnection networks;",
    	note = "channel classes;deadlock avoidance;wormhole networks;deadlock-free adaptive routing algorithms;store-and-forward;adaptive algorithm;cyclic dependencies;partial order;equivalence relation;verification;",
    	number = 4,
    	pages = "347 - 54",
    	title = "{C}hannel classes: a new concept for deadlock avoidance in wormhole networks",
    	volume = 2,
    	year = 1992
    }
    
  153. Jose Duato. Impact of locality on the performance of some adaptive routing algorithms for the hypercube. Proceedings of the European Workshops on Parallel Computing, pages 123 - 123, 1992. BibTeX

    @article{ 1993041545718,
    	author = "Duato, Jose",
    	address = "Barcelona, Spain",
    	journal = "Proceedings of the European Workshops on Parallel Computing",
    	pages = "123 - 123",
    	title = "{I}mpact of locality on the performance of some adaptive routing algorithms for the hypercube",
    	year = 1992
    }
    
  154. Jose Duato. Impact of locality on the performance of some adaptive routing algorithms for the hypercube. 1992, 123 - 6. BibTeX

    @conference{ 4391376,
    	author = "Duato, Jose",
    	abstract = "In previous papers, a new theory for the design of deadlock-free adaptive routing algorithms for wormhole and store-and-forward routing as well as two design methodologies have been proposed. Also, a new adaptive routing algorithm, obtained from the application of the former theory to the binary n-cube, has been evaluated using a uniform message distribution. The current paper analyses the effect of locality using a decreasing probability distribution for message destination. For that distribution, the results show that adaptive algorithms outperform static ones, except for very small networks with little traffic",
    	address = "Amsterdam, Netherlands",
    	journal = "Parallel Computing: From Theory to Sound Practice. Proceedings of EWPC '92, the European Workshops on Parallel Computing",
    	keywords = "hypercube networks;parallel algorithms;performance evaluation;",
    	note = "wormhole routing;adaptive routing algorithms;hypercube;deadlock-free;store-and-forward routing;message distribution;locality;decreasing probability distribution;message destination;",
    	pages = "123 - 6",
    	title = "{I}mpact of locality on the performance of some adaptive routing algorithms for the hypercube",
    	year = 1992
    }
    
  155. Jose Duato. Improving the efficiency of virtual channels with time-dependent selection functions. 1992, 635 - 50. BibTeX

    @conference{ 4325039,
    	author = "Duato, Jose",
    	abstract = "In previous papers by the author (1991, 1992), a new theory for the design of deadlock-free adaptive routing algorithms for wormhole and store-and-forward routing as well as two design methodologies have been proposed. Also, a new adaptive routing algorithm, obtained from the application of the former theory to the binary n-cube, has been evaluated using both, a uniform and an exponential distribution for message destination. The results are good, especially for large networks and a uniform distribution for message destination. When locality is exploited, the results are comparatively worse, mainly due to the reduction in channel bandwidth produced by channel multiplexing. In this paper, the author analyses the advantages and disadvantages produced by the use of virtual channels, proposing a new approach to maximize their efficiency. This approach uses time-dependent selection functions, associating a threshold to some virtual channels. Those channels cannot be selected by a message unless it is waiting for longer than the corresponding threshold. The evaluation of the new selection function for the binary n-cube shows an important improvement, especially when locality is exploited",
    	address = "Berlin, Germany",
    	journal = "PARLE '92. Parallel Architectures and Languages Europe. 4th International PARLE Conference. Proceedings",
    	keywords = "message passing;multiprocessor interconnection networks;",
    	note = "wormhole routing;virtual channels;time-dependent selection functions;deadlock-free adaptive routing algorithms;store-and-forward routing;binary n-cube;message destination;channel multiplexing;",
    	pages = "635 - 50",
    	title = "{I}mproving the efficiency of virtual channels with time-dependent selection functions",
    	year = 1992
    }
    
  156. Antonio Robles and Jose Duato. Multilinks: a new approach to the design of adaptive routing algorithms for multicomputers. 1992, 405 - 10. BibTeX

    @conference{ 4214095,
    	author = "Robles, Antonio and Duato, Jose",
    	abstract = "A new methodology for the design of deadlock-free adaptive routing algorithms is proposed, which is based on the use of multilinks. This is a new concept consisting of a virtual link formed by several adjacent physical channels simultaneously reserved by the router. Through simulation, the paper investigates the performance of two adaptive strategies for wormhole routing based on multilinks, comparing them with static routing. All adaptive routing strategies outperformed static routing significantly. Different network sizes have been evaluated, showing that the relative improvement of adaptive routing with regard to static routing increases with the network size",
    	address = "Amsterdam, Netherlands",
    	journal = "Parallel and Distributed Computing in Engineering Systems. Proceedings of the IMACS/IFAC International Symposium",
    	keywords = "multiprocessor interconnection networks;switching theory;",
    	note = "adaptive routing algorithms;multicomputers;deadlock-free adaptive routing algorithms;multilinks;virtual link;physical channels;wormhole routing;network size;",
    	pages = "405 - 10",
    	title = "{M}ultilinks: a new approach to the design of adaptive routing algorithms for multicomputers",
    	year = 1992
    }
    
  157. J J Serrano, Vicente Santonja, P J Gil and R Ors. Reliability and safety evaluation techniques for components and processes. 1992, 113 - 122. BibTeX

    @conference{ 1993020647258,
    	author = "J.J. Serrano and Santonja, Vicente and P.J. Gil and R. Ors",
    	abstract = "The increasing complexity of industrial control systems and industrial processes makes it necesary to have the tools and techniques for reliability and safety analysis in these systems. In this paper, we survey the most frecuently used techniques in reliability and safety analysis of components and industrial processes. The methods surveyed are Fault Trees, Markov Models and Stochastics Petri nets. We study the characteristics and the main uses of these methods. The usefulness of the revised methods is demonstrated in a model-based faul-detection, comparing a measurement system with redundant sensors and another with analytical redundancy.",
    	address = "Baden-Baden, Ger",
    	issn = 09629505,
    	journal = "IFAC Symposia Series",
    	key = "Control systems",
    	keywords = "Accident prevention;Components;Evaluation;Failure analysis;Industrial plants;Petri nets;Process control;Redundancy;Reliability;Safety factor;Sensors;",
    	note = "Model based fault detection;Redundant sensors;Safety evaluation;",
    	number = 6,
    	pages = "113 - 122",
    	title = "{R}eliability and safety evaluation techniques for components and processes",
    	year = 1992
    }
    
  158. J M Garcia and Jose Duato. An algorithm for dynamic reconfiguration of a multicomputer network. 1991, 848 - 55. URL BibTeX

    @conference{ 4368132,
    	author = "J.M. Garcia and Duato, Jose",
    	abstract = "The dynamic reconfiguration of the interconnection network is an advanced feature of some multicomputers to reduce the communication overhead. The authors present an algorithm for the dynamic reconfiguration of the network. Reconfiguration is limited, preserving the original topology. Long distance message passing is minimized by positioning communication partners close to each other. The algorithm is transparent to the application programmer and is not restricted to a particular class of applications, being very well suited for parallel applications whose communication pattern varies over time. The paper also presents some simulation results, showing the benefits from the new reconfiguration algorithm",
    	address = "Los Alamitos, CA, USA",
    	journal = "Proceedings of the Third IEEE Symposium on Parallel and Distributed Processing (Cat. No.91TH0396-2)",
    	keywords = "message passing;multiprocessor interconnection networks;parallel processing;",
    	note = "algorithm;dynamic reconfiguration;multicomputer network;interconnection network;message passing;simulation;",
    	pages = "848 - 55",
    	title = "{A}n algorithm for dynamic reconfiguration of a multicomputer network",
    	url = "http://dx.doi.org/10.1109/SPDP.1991.218232",
    	year = 1991
    }
    
  159. Jose Duato. Deadlock-free adaptive routing algorithms for multi-computers. Evaluation of a new algorithm. 1991, 840 - 840. URL BibTeX

    @conference{ 1993031511897,
    	author = "Duato, Jose",
    	address = "Dallas, TX, USA",
    	pages = "840 - 840",
    	title = "{D}eadlock-free adaptive routing algorithms for multi-computers. {E}valuation of a new algorithm",
    	url = "http://dx.doi.org/10.1109/SPDP.1991.218233",
    	year = 1991
    }
    
  160. Jose Duato. Deadlock-free adaptive routing algorithms for multicomputers. Technique et Science Informatiques 10(4):275 - 85, 1991. BibTeX

    @article{ 4002820,
    	author = "Duato, Jose",
    	abstract = "The paper proposes a very simple and powerful methodology to design deadlock-free adaptive routing algorithms for wormhole networks. The routing algorithms obtained from the application of that methodology to 2D and 3D-meshes are evaluated by simulation. As simulations are time consuming and adaptive algorithms are interesting when the network traffic is high, the simulations are restricted to the evaluation of networks of different sizes under worst conditions for medium to high message injection rates",
    	address = "France",
    	issn = "0752-4072",
    	journal = "Technique et Science Informatiques",
    	keywords = "concurrency control;parallel algorithms;",
    	note = "2D meshes;multicomputers;deadlock-free adaptive routing algorithms;wormhole networks;3D-meshes;simulation;worst conditions;message injection rates;",
    	number = 4,
    	pages = "275 - 85",
    	title = "{D}eadlock-free adaptive routing algorithms for multicomputers",
    	volume = 10,
    	year = 1991
    }
    
  161. Jose Duato. On the design of deadlock-free adaptive routing algorithms for multicomputers: design methodologies. 1991, 390 - 405. BibTeX

    @conference{ 3967718,
    	author = "Duato, Jose",
    	abstract = "The paper develops the theoretical background for the design of deadlock-free adaptive routing algorithms for wormhole as well as store-and-forward routing. Some basic definitions and four theorems are proposed, developing conditions to verify that an adaptive algorithm is deadlock-free, even when there are cycles in the channel dependency graph. Also, two design methodologies are proposed. The first one supplies algorithms with a high degree of freedom, without increasing the number of physical channels. The second methodology is intended for the design of fault-tolerant algorithms. Some examples are given, showing the application of the methodologies",
    	address = "Berlin, Germany",
    	journal = "PARLE '91. Parallel Architectures and Languages Europe. Volume I: Parallel Architectures and Algorithms",
    	keywords = "fault tolerant computing;parallel processing;system recovery;",
    	note = "deadlock-free adaptive routing algorithms;wormhole;store-and-forward routing;adaptive algorithm;deadlock-free;channel dependency graph;fault-tolerant algorithms;",
    	pages = "390 - 405",
    	title = "{O}n the design of deadlock-free adaptive routing algorithms for multicomputers: design methodologies",
    	year = 1991
    }
    
  162. Jose Duato. On the design of deadlock-free adaptive routing algorithms for multicomputers: theoretical aspects. 1991, 234 - 43. BibTeX

    @conference{ 3928381,
    	author = "Duato, Jose",
    	abstract = "Second generation multicomputers use wormhole routing, drastically reducing the dependency between network latency and internode distance. Deadlock-free routing strategies have been developed, allowing the implementation of fast hardware routers. Also, adaptive routing algorithms with deadlock-avoidance or deadlock-recovery techniques have been proposed for some topologies, being very effective and outperforming static strategies. This paper develops the theoretical aspects for the design of deadlock-free adaptive routing algorithms. Some basic definitions and three theorems are proposed, developing conditions to verify that an adaptive algorithm is deadlock-free, even when there are cycles in the channel dependency graph. As an example, a new adaptive algorithm for 2D-meshes is presented",
    	address = "Berlin, Germany",
    	journal = "Distributed Memory Computing. 2nd European Conference, EDMCC2 Proceedings",
    	keywords = "multiprocessor interconnection networks;parallel architectures;switching theory;",
    	note = "message passing;deadlock-free adaptive routing algorithms;multicomputers;wormhole routing;network latency;deadlock-avoidance;channel dependency graph;2D-meshes;",
    	pages = "234 - 43",
    	title = "{O}n the design of deadlock-free adaptive routing algorithms for multicomputers: theoretical aspects",
    	year = 1991
    }
    
  163. Maria E Gomez, L F Castro, G Bolanos, O Moran and P Prieto. Preparation and properties of high-Tc superconducting Bi(Pb-Sr-Ca-Cu-O thick films by a melting-quenching-annealing method. 1991, 469 - 469. BibTeX

    @conference{ 1993041559305,
    	author = "Gomez, Maria E. and L.F. Castro and G. Bolanos and O. Moran and P. Prieto",
    	address = "Cusco, Peru",
    	journal = "Proceedings of the Latin American Symposium on Surface Physics",
    	pages = "469 - 469",
    	title = "{P}reparation and properties of high-{T}c superconducting {B}i({P}b-{S}r-{C}a-{C}u-{O} thick films by a melting-quenching-annealing method",
    	year = 1991
    }
    
  164. P J Gil, J J Serrano, R Ors and Vicente Santonja. Dependability evaluation of watchdog processors. 1990, 89 - 94. BibTeX

    @conference{ 4031086,
    	author = "P.J. Gil and J.J. Serrano and R. Ors and Santonja, Vicente",
    	abstract = "Many control systems need a good safety level, this can be done with the help of a watchdog processor that does not involve a high increase in the system's cost compared with other fault tolerant structures. The paper presents Markov models for studying the safety, reliability and availability of a watchdog processor. To carry out a benefit analysis, the results of the models of the three systems: simplex systems, reconfigurable duplication systems, and standby sparing systems, are compared. The influence of parameters, such as: coverage, permanent faults, fault rate, etc. in all models is considered. For solving the models in the transient mode, a program based on the randomization method is used",
    	address = "Oxford, UK",
    	journal = "Safety of Computer Control Systems 1990 (SAFECOMP '90). Safety, Security and Reliability Related Computers for the 1990s. Proceedings of the IFAC/EWICS/SARS Symposium",
    	keywords = "fault tolerant computing;industrial computer control;Markov processes;safety;software reliability;supervisory programs;",
    	note = "dependability evaluation;control systems;safety level;watchdog processor;fault tolerant structures;Markov models;benefit analysis;simplex systems;reconfigurable duplication systems;standby sparing systems;coverage;randomization method;",
    	pages = "89 - 94",
    	title = "{D}ependability evaluation of watchdog processors",
    	year = 1990
    }
    
  165. Jose Duato and A Gonzalez. Multicomputer simulator. 1989, 367 - 367. BibTeX

    @conference{ 1991021057214,
    	author = "Duato, Jose and A. Gonzalez",
    	address = "Rennes, France",
    	pages = "367 - 367",
    	title = "{M}ulticomputer simulator",
    	year = 1989
    }
    
  166. Jose Duato and A Gonzalez. Multicomputer simulator. 1989, 367 - 8. BibTeX

    @conference{ 3602017,
    	author = "Duato, Jose and A. Gonzalez",
    	abstract = "Presents a multicomputer simulator, specially oriented to the development, debugging and evaluation of parallel numerical algorithms. This simulator implements a model, based on the features of real multicomputers and is not intended to substitute real machines, but to complement them. Additionally, the simulator allows the use of friendly languages, like Modula-2, including the definition of complex data structures and dynamical memory allocation. The model currently implemented is described together with the way to implement parallel algorithms with this simulator. The paper also indicates how to use the simulator and proposes some future developments",
    	address = "Amsterdam, Netherlands",
    	journal = "Hypercube and Distributed Computers. Proceedings of the First European Workshop",
    	keywords = "mathematics computing;parallel algorithms;",
    	note = "multicomputer simulator;development;debugging;evaluation;parallel numerical algorithms;friendly languages;Modula-2;data structures;dynamical memory allocation;",
    	pages = "367 - 8",
    	title = "{M}ulticomputer simulator",
    	year = 1989
    }
    
  167. Jose Duato and J Pons. Parallel triangularization of sparse matrices on distributed memory multiprocessors. 1989, 133 - 133. BibTeX

    @conference{ 1991021057189,
    	author = "Duato, Jose and J. Pons",
    	address = "Rennes, France",
    	pages = "133 - 133",
    	title = "{P}arallel triangularization of sparse matrices on distributed memory multiprocessors",
    	year = 1989
    }
    
  168. Jose Duato and J Pons. Parallel triangularization of sparse matrices on distributed memory multiprocessors. 1989, 133 - 46. BibTeX

    @conference{ 3601992,
    	author = "Duato, Jose and J. Pons",
    	abstract = "Square root free Givens rotations and their suitability for the parallel triangularization of sparse matrices are studied. Also, ways to split the overall problem into several tasks and the distribution of these task among the processors are analysed. A parallel algorithm to implement the triangularization of a sparse matrix on a distributed memory multiprocessor is proposed. This paper also presents the results of the performance evaluation on a multicomputer simulator, showing that very good speedups can be obtained, even with relatively small matrices. The evaluation also shows that a ring supports the algorithm efficiently",
    	address = "Amsterdam, Netherlands",
    	journal = "Hypercube and Distributed Computers. Proceedings of the First European Workshop",
    	keywords = "linear algebra;multiprocessing systems;parallel algorithms;performance evaluation;",
    	note = "parallel triangularization;square root free Givens rotation;sparse matrices;distributed memory multiprocessors;parallel algorithm;distributed memory multiprocessor;performance evaluation;multicomputer simulator;",
    	pages = "133 - 46",
    	title = "{P}arallel triangularization of sparse matrices on distributed memory multiprocessors",
    	year = 1989
    }
    
  169. R Bru, Jose Duato, A Gonzalez, J Mas and A Urbano. Performance evaluation of a parallel algorithm for inverting dense matrices on distributed memory multiprocessors. 1989, 647 - 50. BibTeX

    @conference{ 3799459,
    	author = "R. Bru and Duato, Jose and A. Gonzalez and J. Mas and A. Urbano",
    	abstract = "The authors present a parallel algorithm to invert a square dense matrix A, based on the Sherman-Morrison formula. It has been developed for distributed memory multiprocessors, obtaining a high degree of parallelism for matrices with a very large size. They have implemented this algorithm on a simulation tool in order to check its correctness. They also give results about the efficiency and speed-up as a function of some variables (size of A, number of processors, arithmetic and communication times) for three interconnection networks",
    	address = "Los Altos, CA, USA",
    	journal = "Proceedings of the Fourth Conference on Hypercubes, Concurrent Computers and Applications",
    	keywords = "hypercube networks;matrix algebra;parallel algorithms;performance evaluation;",
    	note = "performance evaluation;matrix inversion;correctness;checking;parallel algorithm;distributed memory multiprocessors;square dense matrix;Sherman-Morrison formula;parallelism;simulation tool;interconnection networks;",
    	pages = "647 - 50",
    	title = "{P}erformance evaluation of a parallel algorithm for inverting dense matrices on distributed memory multiprocessors",
    	year = 1989
    }
    
  170. Jose Duato. A network topology for parallel processing on message-passing architectures. 1988, 167 - 73. BibTeX

    @conference{ 3531987,
    	author = "Duato, Jose",
    	abstract = "In order to construct general purpose massively parallel systems, message-passing architectures appear as a trade-off between flexibility and cost. In this kind of systems, the communication among processors relies on an interconnection network. Point-to-point topologies are normally used, each node sending, receiving and routing messages in a distributed manner. Some authors have tried to find a trade-off between diameter and node degree, also maintaining the possibility to design a simple routing algorithm, such as the hypernets or the cube-connected cycles. In this paper, a new topology with a node degree equal to four is defined. Its main feature is a very small diameter, which is only slightly larger than the diameter of a hypercube with the same number of nodes, whatever the network size is. The diameter and the distances between nodes are also presented for different sized networks, comparing the proposed topology with other topologies. Finally, a distributed algorithm to route messages through the network is given",
    	address = "St.Petersburg, FL, USA",
    	journal = "ICS 88. Third International Conference on Supercomputing. Proceedings, Supercomputing '88",
    	keywords = "computer architecture;parallel processing;",
    	note = "network topology;parallel processing;message-passing architectures;massively parallel systems;interconnection network;hypercube;distributed algorithm;",
    	pages = "167 - 73",
    	title = "{A} network topology for parallel processing on message-passing architectures",
    	year = 1988
    }
    
  171. Jose Duato. A network topology with small diameter and constant node degree. 1988, 107 - 10. BibTeX

    @conference{ 3451387,
    	author = "Duato, Jose",
    	abstract = "In order to construct general purpose massively parallel computers, message-passing architectures appear as a trade-off between flexibility and cost. In these kind of systems, the communication among processors relies on an interconnection network. Point-to-point topologies are normally used, each node sending, receiving and routing message in a distributed manner. In the paper, a topology with a node degree equal to four is defined. Its main feature is a very small diameter, which is only slightly larger than the diameter of a hypercube with the same number of nodes, whatever the network size is. Besides, distributed algorithms to route and broadcast messages through the network are proposed, also giving an Occam implementation",
    	address = "Anaheim, CA, USA",
    	journal = "Proceedings of the IASTED International Symposium Applied Informatics - AI '88",
    	keywords = "multiprocessor interconnection networks;",
    	note = "network topology;constant node degree;massively parallel computers;message-passing architectures;interconnection network;small diameter;hypercube;distributed algorithms;route;broadcast;Occam implementation;",
    	pages = "107 - 10",
    	title = "{A} network topology with small diameter and constant node degree",
    	year = 1988
    }
    
  172. Jose Duato and J Pons. A new family of network topologies for message-passing architectures. 1988, 111 - 14. BibTeX

    @conference{ 3532077,
    	author = "Duato, Jose and J. Pons",
    	abstract = "The communication among processors in message-passing architectures relies on an interconnection network. Some authors have tried to find a trade-off between diameter and node degree, also allowing the design of a simple routing algorithm such as the hypernets or the cube-connected cycles. In this paper, a new family of topologies is defined, allowing to choose the diameter and node degree independently. Its main feature is a very small diameter, obtained with a small node degree. The diameter and normalized diameter are presented for different sized networks, comparing the proposed family with other topologies",
    	address = "Amsterdam, Netherlands",
    	journal = "Parallel Processing. Proceedings of the IFIP WG 10.3 Working Conference",
    	keywords = "computer architecture;multiprocessor interconnection networks;network topology;",
    	note = "network topologies;message-passing architectures;interconnection network;routing algorithm;hypernets;",
    	pages = "111 - 14",
    	title = "{A} new family of network topologies for message-passing architectures",
    	year = 1988
    }
    
  173. Jose Duato. Parallel processing of the square root free Givens rotations by means of a transputer network. 1988, 257 - 64. BibTeX

    @conference{ 3228374,
    	author = "Duato, Jose",
    	abstract = "There are many applications that require the use of orthogonal transformations. Since the cost of microprocessors is decreasing rapidly, they allow the construction of inexpensive multiprocessors. In particular, the transputer from Inmos allows an easy implementation of a powerful multiprocessor, within a great flexibility in the design of the interconnection network topology. Among the orthogonal transformations, the Givens rotations are best suited for a parallel computer because they exhibit a great potential parallelism. Moreover, the square root free Givens rotations are twice as fast as the conventional ones. In this paper, the possibility to split the transformation of a matrix into several tasks and their distribution among the processors are analysed. Besides, a parallel algorithm to implement the fast Givens rotations with a network of transputers is proposed. Also, the topology of the transputer network is proposed. This topology can be implemented with a different number of processors, depending on the processing speed required. Finally, the results of the algorithm simulation on different sized networks are shown",
    	address = "Amsterdam, Netherlands",
    	journal = "Parallel Processing and Applications. Proceedings of the International Conference",
    	keywords = "microprocessor chips;parallel algorithms;",
    	note = "parallel processing;square root free Givens rotations;transputer network;orthogonal transformations;Inmos;interconnection network topology;parallel algorithm;algorithm simulation;",
    	pages = "257 - 64",
    	title = "{P}arallel processing of the square root free {G}ivens rotations by means of a transputer network",
    	year = 1988
    }
    
  174. Jose Duato and P Albertos. Simplified non-linear control of lifts. 1988, 149 - 54. BibTeX

    @conference{ 3147950,
    	author = "Duato, Jose and P. Albertos",
    	abstract = "A non-linear controller for lift installations is proposed in this paper. After a brief description of the system, the main difficulties that arose during the evaluation of a prototype with a linear regulator are reported, showing the load influence and the non-linearity of the motor. Later, a simplified dynamic model for the lift motor is proposed, analysing two methods for load estimation. Finally, some non-linear functions are included in the regulator, thus improving the system behaviour. Also, the main results obtained with an 85 HP motor are reported",
    	address = "Oxford, UK",
    	journal = "Components, Instruments and Techniques for Low Cost Automation and Applications. IFAC Symposium",
    	keywords = "computerised control;controllers;lifts;nonlinear control systems;",
    	note = "nonlinear controller;lifts;load influence;dynamic model;load estimation;regulator;",
    	pages = "149 - 54",
    	title = "{S}implified non-linear control of lifts",
    	year = 1988
    }
    
  175. Jose Duato. Fault-tolerant microprocessor-based control system for multiple lift installations. 1987, 209 - 13. BibTeX

    @conference{ 3067181,
    	author = "Duato, Jose",
    	abstract = "A fault-tolerant control system for multiple lift installations is proposed. The system has a modular structure. It is composed by a system controller and as many motor controllers as lifts. The motor controller supports transient failure recovery through software routines and watchdog timers. Each lift may be assigned to two motor controllers, thus taking advantage of the existence of similar components. The system controller has a redundant architecture and, among other things, governs the system reconfiguration in the case of a failure. The additional cost of true redundancy is less than 1% of the overall installation cost. Also, the main causes of failure are analysed as well as the suitability of the proposed architecture to recover them",
    	address = "Oxford, UK",
    	journal = "Microcomputer Application in Process Control. Selected Papers from the IFAC Symposium",
    	keywords = "control systems;induction motors;lifts;microcomputer applications;position control;",
    	note = "induction motors;position control;fault-tolerant microprocessor based control system;multiple lift installations;transient failure recovery;watchdog timers;motor controllers;",
    	pages = "209 - 13",
    	title = "{F}ault-tolerant microprocessor-based control system for multiple lift installations",
    	year = 1987
    }
    
  176. Jose Duato. Nonlinear digital control of three-phase asynchronous motors. Automatica e Instrumentacion 21(170):177 - 81, 1987. BibTeX

    @article{ 3015985,
    	author = "Duato, Jose",
    	abstract = "The problem of the speed control of three-phase asynchronous motors driving, for example, elevators (lifts) in cases in which the control may be effected by varying the voltage on the stator is addressed. The article proposes a simplified dynamic model of the motor and its load and analyses simple methods of estimating the actual load. It suggests methods of digital control based upon the use of microprocessors. It discusses the technology in some detail and indicates the nature of results obtained experimentally",
    	address = "Spain",
    	issn = "0213-3113",
    	journal = "Automatica e Instrumentacion",
    	keywords = "computerised control;electric drives;induction motors;lifts;nonlinear control systems;",
    	note = "nonlinear digital control;stator-voltage variation;load estimation;microprocessor-based digital control;three-phase asynchronous motors;speed control;elevators;lifts;simplified dynamic model;",
    	number = 170,
    	pages = "177 - 81",
    	title = "{N}onlinear digital control of three-phase asynchronous motors",
    	volume = 21,
    	year = 1987
    }
    
  177. Jose Duato, P Albertos and J M Valiente. Position and speed control for lift motors based on a micro. Regulacion y Mando Automatico 19(147):155 - 9, 1985. BibTeX

    @article{ 2503597,
    	author = "Duato, Jose and P. Albertos and J.M. Valiente",
    	abstract = "Many industrial applications involve the movement of large bodies with a precise end position but which do not require high precision speed control. This article describes the principle operating features of a micro-based control system for lift motors which offers the appropriate desirable features",
    	address = "Spain",
    	issn = "0040-1722",
    	journal = "Regulacion y Mando Automatico",
    	keywords = "computerised control;lifts;position control;velocity control;",
    	note = "speed control;position control;computerised control;lift motors;micro;industrial applications;operating features;",
    	number = 147,
    	pages = "155 - 9",
    	title = "{P}osition and speed control for lift motors based on a micro",
    	volume = 19,
    	year = 1985
    }
    
  178. P Albertos and Jose Duato. Position control with power induction motors. 1984, 547 - 52. BibTeX

    @conference{ 2318470,
    	author = "P. Albertos and Duato, Jose",
    	abstract = "Some industrial control applications require large mass displacement with accurate final positioning control, displacement speed being limited by the maximum available motor torque. A microprocessor-based control system is presented. To reach the target position a multimode control is implemented, including acceleration, maximum speed and deceleration controls, as well as final position control. A squirrel cage induction motor is used as actuator, speed being controlled by stator voltage control. The control system has been implemented on a 10 HP induction motor",
    	address = "Oxford, UK",
    	journal = "Control in Power Electronics and Electrical Drives. Proceedings of the Third IFAC Symposium",
    	keywords = "computerised control;induction motors;position control;squirrel cage motors;voltage control;",
    	note = "induction motors;industrial control applications;positioning control;microprocessor-based control system;position control;squirrel cage;actuator;stator voltage control;",
    	pages = "547 - 52",
    	title = "{P}osition control with power induction motors",
    	year = 1984
    }
    
  179. J A Puente, A Crespo, Jose Duato and P Albertos. A kernel for high level real-time programming. 1983, 7 - 10. BibTeX

    @conference{ 2237347,
    	author = "de la Puente, J.A. and A. Crespo and Duato, Jose and P. Albertos",
    	abstract = "A kernel for the implementation of real-time primitives in high-level languages is presented. The kernel is suited for developing control applications in minicomputers and microcomputers and is portable from one system to another. An application to a pilot-scale process control system has been developed using the kernel",
    	address = "New York, NY, USA",
    	journal = "Proceedings of MELECON '83. Mediterranean Electrotechnical Conference",
    	keywords = "high level languages;process computer control;programmed control;real-time systems;",
    	note = "high level real-time programming;kernel;real-time primitives;high-level languages;control applications;minicomputers;microcomputers;pilot-scale process control system;",
    	pages = "7 - 10",
    	title = "{A} kernel for high level real-time programming",
    	year = 1983
    }
    
  180. J A Puente, A Crespo, Jose Duato and P Albertos. KERNEL FOR HIGH LEVEL REAL-TIME PROGRAMMING.. 1983, IEEE Region 8 -. BibTeX

    @conference{ 1984020034052,
    	author = "de la Puente, J.A. and A. Crespo and Duato, Jose and P. Albertos",
    	address = "Athens, Greece",
    	key = "COMPUTER PROGRAMMING LANGUAGES",
    	note = "HIGH LEVEL LANGUAGES;KERNEL;MICROCOMPUTERS;MINICOMPUTERS;REAL-TIME PROGRAMMING;SOFTWARE INTERFACES;",
    	pages = "IEEE Region 8 -",
    	title = "{KERNEL} {FOR} {HIGH} {LEVEL} {REAL}-{TIME} {PROGRAMMING}.",
    	volume = 1,
    	year = 1983
    }