@InProceedings{Howlett:2010:AISB,
    author = "Andrew Howlett and Simon Colton and Cameron Browne",
    title = "Evolving Pixel Shaders for the Prototype Video Game
    Subversion",
    booktitle = "The Thirty Sixth Annual Convention of the Society for
    the Study of Artificial Intelligence and Simulation of
    Behaviour (AISB'10)",
    year = "2010",
    address = "De Montfort University, Leicester, UK",
    month = "30th " # mar,
    note = "AI \& Games Symposium",
    keywords = "genetic algorithms, genetic programming, GPU, OpenGL
    GLSL",
    URL = "http://www.doc.ic.ac.uk/~sgc/papers/howlett_aisb10.pdf",
    size = "6 pages",
    abstract = "Pixel shaders can be used to create a variety of
    visual effects in 3D environments, far more efficiently
    than if produced using the standard graphics pipeline.
    For such efficiency reasons, pixel shaders are commonly
    used in video game rendering, to add artistic or other
    visual effects. We investigate the automated creation
    of novel shader programs for rendering scenes in the
    Subversion virtual game world, with a view to providing
    the player with a visually richer and more diverse 3D
    environment. We show how shader programs based on the
    OpenGL shading language may be represented in a
    hierarchical tree form. This representation admits an
    evolutionary approach to shader creation, and we show
    how the application of genetic programming techniques
    can lead to the evolution of new and interesting
    shaders. We harness this for an approach where the user
    supplies details of a fitness function for the overall
    look of the city environment. We experimented with a
    number of different fitness function setups in order to
    produce some preliminary results about this approach.
    While generally successful in the creation of novel and
    visually interesting shading effects with little
    effort, we find some drawbacks to the approach and
    suggest methods for improvement.",
    notes = "Colour as
    RGBA.
    
    http://www.aisb.org.uk/convention/aisb10/AISB2010.html
    
    http://staffwww.dcs.shef.ac.uk/people/D.Romano/AISB10/program.html
    
    www.introversion.co.uk/subversion",
    }
    
@inproceedings{Luong:2010:gecco,
    author = {Luong, Th\'{e} Van and Melab, Nouredine and Talbi, El-Ghazali},
    title = {GPU-based island model for evolutionary algorithms},
    booktitle = {GECCO '10: Proceedings of the 12th annual conference on
    Genetic and evolutionary computation},
    year = {2010},
    editor = {Juergen Branke and Martin Pelikan and Enrique Alba and Dirk
    V. Arnold and Josh Bongard and Anthony Brabazon and Juergen Branke and
    Martin V. Butz and Jeff Clune and Myra Cohen and Kalyanmoy Deb and
    Andries P Engelbrecht and Natalio Krasnogor and Julian F. Miller and
    Michael O'Neill and Kumara Sastry and Dirk Thierens and Jano {van
    Hemert} and Leonardo Vanneschi and Carsten Witt},
    isbn = {978-1-4503-0072-8},
    pages = {1089--1096},
    month = {7-11 July},
    organisation ={SIGEVO},
    address = {Portland, Oregon, USA},
    doi = {doi:10.1145/1830483.1830685},
    publisher = {ACM},
    address = {New York, NY, USA},
    keywords = {GPU}
    }
    
@misc{langdon2010talk,
    author = {William B. Langdon},
    title = {Programming Graphics Cards with CUDA for Genetic Programming},
    howpublished = {Ecole d'été Evolution Artificielle 2010},
    year = "2010",
    month = {14-17 June},
    note = {Invited talk},
    address = {Calais},
    keywords = {genetic algorithms, genetic programming},
    url = {http://sites.google.com/site/ecoleea2010/supports-de-cours-et-tps},
    size = {39 pages}
    )
    
    
@InProceedings{Maitre:2010:EuroGP,
    author = "Ogier Maitre and Pierre Collet and Nicolas Lachiche",
    title = "Fast Evaluation of {GP} Trees on {GPGPU} by Optimizing Hardware Scheduling",
    booktitle = "Proceedings of the 13th European Conference on Genetic Programming, EuroGP 2010",
    year = "2010",
    editor = "Anna Isabel Esparcia-Alcazar and Aniko Ekart and Sara Silva and Stephen Dignum and A. Sima Uyar",
    volume = "6021",
    series = "LNCS",
    pages = "301--312",
    address = "Istanbul",
    month = "7-9 " # apr,
    organisation = "EvoStar",
    publisher = "Springer",
    keywords = "genetic algorithms, genetic programming",
    isbn13 = "978-3-642-12147-0",
    doi = "doi:10.1007/978-3-642-12148-7_26"
    }
    
@InProceedings{KMJBC10,
    author = {F. Krüger and O. Maitre, S and Jimenez and L. Baumes and P. Collet},
    title = {Speedups between x70 and x120 for a generic local search (memetic) algorithm on a single GPGPU chip},
    booktitle = {EvoNum 2010},
    series = "LNCS",
    year = {2010},
    publisher = {Springer},
    volume = "6024",
    url = {http://lsiit-cnrs.unistra.fr/Publications/2010/KMJBC10},
    pages = "501-511"
    }
    
@INPROCEEDINGS{9184,
    author = {Petr Pospíchal and Jiří Jaroš and Josef Schwarz},
    title = {Parallel Genetic Algorithm on the CUDA Architecture},
    pages = {442--451},
    booktitle = {Applications of Evolutionary Computation},
    series = {LNCS 6024},
    year = {2010},
    location = {Berlin Heidelberg, DE},
    publisher = {Springer Verlag},
    ISBN = {978-3-642-12238-5},
    language = {english},
    url = {http://www.fit.vutbr.cz/research/view_pub.php?id=9184}
    }
    
    
    
@INPROCEEDINGS{9253,
    author = {Petr Pospíchal and Josef Schwarz and Jiří Jaroš},
    title = {Parallel Genetic Algorithm Solving 0/1 Knapsack Problem Running on the GPU},
    pages = {64--70},
    booktitle = {16th International Conference on Soft Computing MENDEL 2010},
    year = {2010},
    location = {Brno, CZ},
    publisher = {Brno University of Technology},
    ISBN = {978-80-214-4120-0},
    language = {english},
    url = {http://www.fit.vutbr.cz/research/view_pub.php?id=9253}
    }
    
@inproceedings{chitty_data_2007,
    address = {London, England},
    title = {A data parallel approach to genetic programming using programmable graphics hardware},
    isbn = {978-1-59593-697-4},
    url = {http://portal.acm.org/citation.cfm?id=1276958.1277274},
    doi = {10.1145/1276958.1277274},
    abstract = {In recent years the computing power of graphics cards has increased significantly. Indeed, the growth in the computing power of these graphics cards is now several orders of magnitude greater than the growth in the power of computer processor units. Thus these graphics cards are now beginning to be used by the scientific community aslow cost, high performance computing platforms. Traditional genetic programming is a highly computer intensive algorithm but due to its parallel nature it can be distributed over multiple processors to increase the speed of the algorithm considerably. This is not applicable for single processor architectures but graphics cards provide a mechanism for developing a data parallel implementation of genetic programming. In this paper we will describe the technique of general purpose computing using graphics cards and how to extend this technique to genetic programming. We will demonstrate the improvement in the performance of genetic programming on single processor architectures which can be achieved by harnessing the computing power of these next generation graphics cards.},
    booktitle = {Proceedings of the 9th annual conference on Genetic and evolutionary computation},
    publisher = {{ACM}},
    author = {Darren M. Chitty},
    year = {2007},
    keywords = {data parallelism, genetic programming, graphics cards},
    pages = {1566--1573}
    },
    
@book{poli_field_2008,
    title = {A Field Guide to Genetic Programming},
    isbn = {1409200736, 9781409200734},
    publisher = {Lulu.com},
    author = {Riccardo Poli and W B Langdon and Nicholas Freitag {McPhee}},
    month = mar,
    year = {2008}
    },
    
@incollection{langdon_simd_2008,
    title = {A {SIMD} Interpreter for Genetic Programming on {GPU} Graphics Cards},
    url = {http://dx.doi.org/10.1007/978-3-540-78671-9_7},
    abstract = {{Mackey-Glass} chaotic time series prediction and nuclear protein classification show the feasibility of evaluating genetic
    programming populations directly on parallel consumer gaming graphics processing units. Using a Linux {KDE} computer equipped with an {nVidia} {GeForce} 8800 {GTX}
    graphics processing unit card the C++ {SPMD} interpretter evolves programs at Giga {GP} operations per second (895 million {GPops).}
    We use the {RapidMind} general processing on {GPU} {(GPGPU)} framework to evaluate an entire population of a quarter of a million
    individual programs on a non-trivial problem in 4 seconds. An efficient reverse polish notation {(RPN)} tree based {GP} is given.},
    booktitle = {Genetic Programming},
    author = {W. Langdon and Wolfgang Banzhaf},
    year = {2008},
    pages = {73--85}
    },
    
@incollection{banzhaf_accelerating_2009,
    title = {Accelerating Genetic Programming through Graphics Processing Units.},
    url = {http://dx.doi.org/10.1007/978-0-387-87623-8_15},
    booktitle = {Genetic Programming Theory and Practice {VI}},
    author = {Wolfgang Banzhaf and Simon Harding and William B. Langdon and Garnett Wilson},
    year = {2009},
    pages = {1--19}
    },
    
@inproceedings{jian-ming_li_efficient_2007,
    title = {An Efficient Fine-grained Parallel Genetic Algorithm Based on {GPU-Accelerated}},
    doi = {{10.1109/NPC.2007.108}},
    abstract = {Fine-grained parallel genetic algorithm {(FGPGA),} though a popular and robust strategy for solving complicated optimization problems, is sometimes inconvenient to use as its population size is restricted by heavy data communication and the parallel computers are relatively difficult to use, manage, maintain and may not be accessible to most researchers. In this paper, we propose a {FGPGA} method based on {GPU-acceleration,} which maps parallel {GA} algorithm to texture-rendering on consumer-level graphics cards. The analytical results demonstrate that the proposed method increases the population size, speeds up its execution and provides ordinary users with a feasible {FGPGA} solution.},
    booktitle = {Network and Parallel Computing Workshops, 2007. {NPC} Workshops. {IFIP} International Conference on},
    author = {{Jian-Ming} Li and {Xiao-Jing} Wang and {Rong-Sheng} He and {Zhong-Xian} Chi},
    year = {2007},
    keywords = {consumer-level graphics cards, data communication, efficient fine-grained parallel genetic algorithm, genetic algorithms, {GPU-acceleration,} optimization problems, parallel computers, rendering (computer graphics), texture-rendering},
    pages = {855--862}
    },
    
@inproceedings{changhao_jiang_automatic_2005,
    title = {Automatic tuning matrix multiplication performance on graphics hardware},
    isbn = {{1089-795X}},
    doi = {{10.1109/PACT.2005.10}},
    abstract = {In order to utilize the tremendous computing power of graphics hardware and to automatically adapt to the fast and frequent changes in its architecture and performance characteristics, this paper implements an automatic tuning system to generate high-performance matrix-multiplication implementation on graphics hardware. The automatic tuning system uses a parameterized code generator to generate multiple versions of matrix multiplication, whose performances are empirically evaluated by actual execution on the target platform. An ad-hoc search engine is employed to search over the implementation space for the version that yields the best performance. In contrast to similar systems on {CPUs,} which utilize cache blocking, register tiling, instruction scheduling tuning strategies, this paper identifies and exploits several tuning strategies that are unique for graphics hardware. These tuning strategies include optimizing for multiple-render-targets, {SIMD} instructions with data packing, overcoming limitations on instruction count and dynamic branch instruction. The generated implementations have comparable performance with expert manually tuned version in spite of the significant overhead incurred due to the use of the high-level {BrookGPU} language.},
    booktitle = {Parallel Architectures and Compilation Techniques, 2005. {PACT} 2005. 14th International Conference on},
    author = {Changhao Jiang and M. Snir},
    year = {2005},
    keywords = {ad-hoc search engine, automatic tuning matrix multiplication, cache blocking, computer graphic equipment, coprocessors, data packing, dynamic branch instruction, graphics hardware, instruction scheduling tuning, matrix multiplication, multiple-render-targets, parameterized code generator, register tiling, {SIMD} instructions},
    pages = {185--194}
    },
    
@inproceedings{miller_cartesian_2008,
    address = {Atlanta, {GA,} {USA}},
    title = {Cartesian genetic programming},
    isbn = {978-1-60558-131-6},
    url = {http://portal.acm.org/citation.cfm?id=1388969.1389075},
    doi = {10.1145/1388969.1389075},
    abstract = {Note: {OCR} errors may be found in this Reference List extracted from the full text article. {ACM} has opted to expose the complete List rather than only correct and linked references.},
    booktitle = {Proceedings of the 2008 {GECCO} conference companion on Genetic and evolutionary computation},
    publisher = {{ACM}},
    author = {Julian Francis Miller and Simon L. Harding},
    year = {2008},
    keywords = {genetic programming},
    pages = {2701--2726}
    },
    
@book{alba_cellular_2008,
    title = {Cellular genetic algorithms},
    isbn = {0387776095, 9780387776095},
    publisher = {Springer},
    author = {Enrique Alba and Bernabe Dorronsoro},
    year = {2008}
    },
    
@inproceedings{zhongwen_luo_cellular_2006,
    title = {Cellular Genetic Algorithms and Local Search for {3-SAT} problem on Graphic Hardware},
    doi = {{10.1109/CEC.2006.1688685}},
    abstract = {As a well known {NP-hard} problem, {SAT} problem is widely discussed by computer science society. In this paper, two common algorithms for {SAT} problems are implemented based on graphic hardware. They are greedy local search and genetic algorithm. After a brief description of the basic algorithm, we give our modification of the algorithm for fitting with graphic hardware's {SIMD} model. Then some implementation details and tricks for graphic hardware are given. At last testing result are given. We compared the performance of graphic hardware based computation with {CPU} based computation. The result shows a performance improvement on graphic hardware over ordinary {CPU.}},
    booktitle = {Evolutionary Computation, 2006. {CEC} 2006. {IEEE} Congress on},
    author = {Zhongwen Luo and Hongzhi Liu},
    year = {2006},
    keywords = {{3-SAT} problem, cellular genetic algorithms, computability, computational complexity, computer graphic equipment, convergence, genetic algorithms, graphic hardware {SIMD} model, greedy algorithms, greedy local search, mathematics computing, {NP-hard} problem, parallel algorithms, search problems},
    pages = {2988--2992}
    },
    
@article{fok_evolutionary_2007,
    title = {Evolutionary Computing on Consumer Graphics Hardware},
    volume = {22},
    issn = {1541-1672},
    doi = {{10.1109/MIS.2007.28}},
    abstract = {We propose implementing a parallel {EA} on consumer graphics cards, which we can find in many {PCs.} This lets more people use our parallel algorithm to solve large-scale, real-world problems such as data mining. Parallel evolutionary algorithms run on consumer-grade graphics hardware achieve better execution times than ordinary evolutionary algorithms and offer greater accessibility than those run on high-performance computers},
    number = {2},
    journal = {Intelligent Systems, {IEEE}},
    author = {{Ka-Ling} Fok and {Tien-Tsin} Wong},
    year = {2007},
    keywords = {computer graphic equipment, computer graphics, consumer graphics card, consumer-grade graphics hardware, evolutionary algorithms, evolutionary computation, evolutionary computing, high-performance computer, parallel algorithm, parallel algorithms, parallel evolutionary algorithm, pervasive computing, scientific computing on graphics-processing units, ubiquitous computing},
    pages = {69--78}
    },
    
@inproceedings{harding_fast_2007,
    title = {Fast Genetic Programming and Artificial Developmental Systems on {GPUs}},
    isbn = {0-7695-2813-9},
    url = {http://portal.acm.org/citation.cfm?id=1256286},
    abstract = {In this paper we demonstrate the use of the Graphics Processing Unit {(GPU)} to accelerate Evolutionary Computation applications, in particular Genetic Programming approaches. We show that it is possible to get speed increases of several hundred times over a typical {CPU} implementation, catapulting {GPU} processing for these applications into the realm of {HPC.} This increase in performance also extends to artificial developmental systems, where evolved programs are used to construct cellular systems. Feasibility of this approach to efficiently evaluate artificial developmental systems based on cellular automata is demonstrated.},
    booktitle = {Proceedings of the 21st International Symposium on High Performance Computing Systems and Applications},
    publisher = {{IEEE} Computer Society},
    author = {Simon Harding and Wolfgang Banzhaf},
    year = {2007},
    pages = {2}
    },
    
@incollection{harding_fast_2007-1,
    title = {Fast Genetic Programming on {GPUs}},
    url = {http://dx.doi.org/10.1007/978-3-540-71605-1_9},
    abstract = {As is typical in evolutionary algorithms, fitness evaluation in {GP} takes the majority of the computational effort. In this paper we demonstrate the use of the Graphics Processing Unit {(GPU)} to accelerate the evaluation of individuals. We show that for both binary and floating point based data types, it is possible to get speed increases of several hundred times over a typical {CPU} implementation. This allows for evaluation of many thousands of fitness cases, and hence should enable more ambitious solutions to be evolved using {GP.}},
    booktitle = {Genetic Programming},
    author = {Simon Harding and Wolfgang Banzhaf},
    year = {2007},
    pages = {90--101}
    },
    
@article{poli_genetic_2007,
    title = {Genetic programming an introductory tutorial and a survey of techniques and applications},
    url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.126.3889},
    author = {Riccardo Poli and William B Langdon and Nicholas F Mcphee and John R Koza},
    year = {2007}
    },
    
@article{harding_genetic_2008,
    title = {Genetic programming on {GPUs} for image processing},
    volume = {1},
    url = {http://www.inderscience.com/search/index.php?action=record&rec_id=24207&prevQuery=&ps=10&m=or},
    doi = {{10.1504/IJHPSA.2008.024207}},
    abstract = {The evolution of image filters using genetic programming is a relatively unexplored task. This is most likely due to the high computational cost of evaluating the evolved programs. The parallel processors available on modern graphics cards can be used to greatly increase the speed of evaluation. Previous papers in this area dealt with tasks such as noise reduction and edge detection. Here we demonstrate that other more complicated processes can also be successfully evolved and that we can 'reverse engineer' the output from filters used in common graphics manipulation programs.},
    number = {4},
    journal = {International Journal of High Performance Systems Architecture},
    author = {S. Harding and W. Banzhaf},
    year = {2008},
    keywords = {genetic programming, {GPU,} graphics processing units, image filters, image processing, parallel processing, reverse engineering},
    pages = {231 -- 240}
    },
    
@article{langdon_gpspmd_2008,
    title = {{GP} on {SPMD} parallel graphics hardware for mega Bioinformatics data mining},
    volume = {12},
    url = {http://dx.doi.org/10.1007/s00500-008-0296-x},
    doi = {10.1007/s00500-008-0296-x},
    abstract = {Abstract We demonstrate a {SIMD} C++ genetic programming system on a single 128 node parallel {nVidia} {GeForce} 8800 {GTX} {GPU} under {RapidMind’s}
    {GPGPU} Linux software by predicting ten year+ outcome of breast cancer from a dataset containing a million inputs. {NCBI} {GEO}
    {GSE3494} contains hundreds of Affymetrix {HG-U133A} and {HG-U133B} {GeneChip} biopsies. Multiple {GP} runs each with a population of
    5 million programs winnow useful variables from the chaff at more than 500 million {GPops} per second. Sources available via
    {FTP.}},
    number = {12},
    journal = {Soft Computing - A Fusion of Foundations, Methodologies and Applications},
    author = {W. Langdon and A. Harrison},
    month = oct,
    year = {2008},
    pages = {1169--1183}
    },
    
@incollection{gobron_gpu_2008,
    title = {{GPU} Accelerated Computation and Visualization of Hexagonal Cellular Automata},
    url = {http://dx.doi.org/10.1007/978-3-540-79992-4_67},
    abstract = {We propose a graphics processor unit {(GPU)-accelerated} method for real-time computing and rendering cellular automata {(CA)}
    that is applied to hexagonal {grids.Based} on our previous work [9] –which introduced first and second dimensional cases– this
    paper presents a model for hexagonal grid algorithms. Proposed method is novel and it encodes and transmits large {CA} key-codes
    to the graphics card and consequently, this technique allows to visualize the {CA} information flow in real-time to easily identify
    emerging behaviors even for large data sets. To show the efficiency of our model we first present a set of characteristic
    hexagonal behaviors, and then describe computational statistics for central processing unit {(CPU)} and {GPU} on a set of different
    hardware and operating system {(OS)} configurations. We show that our model is flexible and very efficient as it permits to
    compute {CA} close to a thousand times faster than classical {CPU} methods. Additionally, free access is provided to our downloadable
    software for hexagonal grid {CA} simulations.},
    booktitle = {Cellular Automata},
    author = {Stephane Gobron and Herva Bonafos and Daniel Mestre},
    year = {2008},
    pages = {512--521}
    },
    
@incollection{vasicek_hardware_2008,
    title = {Hardware Accelerators for Cartesian Genetic Programming},
    url = {http://dx.doi.org/10.1007/978-3-540-78671-9_20},
    abstract = {A new class of {FPGA-based} accelerators is presented for Cartesian Genetic Programming {(CGP).} The accelerators contain a genetic
    engine which is reused in all applications. Candidate programs (circuits) are evaluated using application-specific virtual
    reconfigurable circuit {(VRC)} and fitness unit. Two types of {VRCs} are proposed. The first one is devoted for symbolic regression
    problems over the fixed point representation. The second one is designed for evolution of logic circuits. In both cases a
    significant speedup of evolution (30–40 times) was obtained in comparison with a highly optimized software implementation
    of {CGP.} This speedup can be increased by creating multiple fitness units.},
    booktitle = {Genetic Programming},
    author = {Zdenek Vasicek and Lukas Sekanina},
    year = {2008},
    pages = {230--241}
    },
    
@inproceedings{robilliard_high_2009,
    address = {Barcelona, Spain},
    title = {High performance genetic programming on {GPU}},
    isbn = {978-1-60558-584-0},
    url = {http://portal.acm.org/citation.cfm?id=1555284.1555299},
    doi = {10.1145/1555284.1555299},
    abstract = {The availability of low cost powerful parallel graphics cards has stimulated the port of Genetic Programming {(GP)} on Graphics Processing Units {(GPUs).} Our work focuses on the possibilities offered by Nvidia G80 {GPUs} when programmed in the {CUDA} language. We compare two parallelization schemes that evaluate several {GP} programs in parallel. We show that the fine grain distribution of computations over the elementary processors greatly impacts performances. We also present memory and representation optimizations that further enhance computation speed, up to 2.8 billion {GP} operations per second. The code has been developed with the well known {ECJ} library.},
    booktitle = {Proceedings of the 2009 workshop on Bio-inspired algorithms for distributed systems},
    publisher = {{ACM}},
    author = {Denis Robilliard and Virginie Marion and Cyril Fonlupt},
    year = {2009},
    keywords = {genetic algorithms, genetic programming, graphics processing units, parallel processing},
    pages = {85--94}
    },
    
@incollection{wong_implementation_2009,
    title = {Implementation of Parallel Genetic Algorithms on Graphics Processing Units},
    url = {http://dx.doi.org/10.1007/978-3-540-95978-6_14},
    abstract = {In this paper, we propose to parallelize a Hybrid Genetic Algorithm {(HGA)} on Graphics Processing Units {(GPUs)} which are available
    and installed on ubiquitous personal computers. {HGA} extends the classical genetic algorithm by incorporating the Cauchy mutation
    operator from evolutionary programming. In our parallel {HGA,} all steps except the random number generation procedure are performed
    in {GPU} and thus our parallel {HGA} can be executed effectively and efficiently. We suggest and develop the novel pseudo-deterministic
    selection method which is comparable to the traditional global selection approach with significant execution time performance
    {advantages.We} perform experiments to compare our parallel {HGA} with our previous parallel {FEP} {(Fast} Evolutionary programming)
    and demonstrate that the former is much more effective and efficient than the latter. The parallel and sequential implementations
    of {HGA} are compared in a number of experiments, it is observed that the former outperforms the latter significantly. The effectiveness
    and efficiency of the pseudo-deterministic selection method is also studied.},
    booktitle = {Intelligent and Evolutionary Systems},
    author = {Man Wong and Tien Wong},
    year = {2009},
    pages = {197--216}
    },
    
@inproceedings{wilson_linear_2008,
    title = {Linear genetic programming {GPGPU} on Microsoft's Xbox 360},
    doi = {{10.1109/CEC.2008.4630825}},
    abstract = {We describe how to harness the graphics processing abilities of a consumer video game console {(Xbox} 360) for general programming on graphics processing unit {(GPGPU)} purposes. In particular, we implement a linear {GP} {(LGP)} system to solve classification and regression problems. We conduct inter- and intra-platform benchmarking of the Xbox 360 and {PC,} using {GPU} and {CPU} implementations on both architectures. Platform benchmarking confirms highly integrated {CPU} and {GPU} programming flexibility of the Xbox 360, having the potential to alleviate typical {GPGPU} decisions of allocating particular functionalities to {CPU} or {GPU.}},
    booktitle = {Evolutionary Computation, 2008. {CEC} 2008. {(IEEE} World Congress on Computational Intelligence). {IEEE} Congress on},
    author = {G. Wilson and W. Banzhaf},
    year = {2008},
    keywords = {classification problems, computer graphic equipment, computer graphics, consumer video game console, genetic algorithms, graphics processing abilities, graphics processing unit, linear genetic programming, linear programming, Microsoft Xbox 360, regression analysis, regression problems},
    pages = {378--385}
    },
    
@incollection{wong_parallel_2006,
    title = {Parallel Evolutionary Algorithms on {Consumer-Level} Graphics Processing Unit},
    url = {http://dx.doi.org/10.1007/3-540-32839-4_7},
    abstract = {Evolutionary Algorithms {(EAs)} are effective and robust methods for solving many practical problems such as feature selection,
    electrical circuits synthesis, and data mining. However, they may execute for a long time for some difficult problems, because
    several fitness evaluations must be performed. A promising approach to overcome this limitation is to parallelize these algorithms.
    In this chapter, we propose to implement a parallel {EA} on consumer-level Graphics Processing Unit {(GPU).} We perform experiments to compare our parallel {EA} with an ordinary {EA} and demonstrate that the former is much more
    effective than the latter. Since consumer-level graphics processing units are already widely available and installed on oridinary
    personal computers and they are easy to use and manage, more people will be able to use our parallel algorithm to solve their
    problems encountered in real-world applications.},
    booktitle = {Parallel Evolutionary Computations},
    author = {{Tien-Tsin} Wong and Man Wong},
    year = {2006},
    pages = {133--155}
    },
    
@inproceedings{man-leung_wong_parallel_2005,
    title = {Parallel evolutionary algorithms on graphics processing unit},
    volume = {3},
    doi = {{10.1109/CEC.2005.1554979}},
    abstract = {Evolutionary algorithms {(EAs)} are effective and robust methods for solving many practical problems such as feature selection, electrical circuit synthesis, and data mining. However, they may execute for a long time for some difficult problems, because several fitness evaluations must be performed. A promising approach to overcome this limitation is to parallelize these algorithms. In this paper, we propose to implement a parallel {EA} on consumer-level graphics cards. We perform experiments to compare our parallel {EA} with an ordinary {EA} and demonstrate that the former is much more effective than the latter. Since consumer-level graphics cards are available in ubiquitous personal computers and these computers are easy to use and manage, more people are able to use our parallel algorithm to solve their problems encountered in real-world applications.},
    booktitle = {Evolutionary Computation, 2005. The 2005 {IEEE} Congress on},
    author = {{Man-Leung} Wong and {Tien-Tsin} Wong and {Ka-Ling} Fok},
    year = {2005},
    keywords = {computer graphic equipment, evolutionary computation, fitness evaluations, graphics cards, graphics processing unit, parallel algorithm, parallel algorithms, parallel evolutionary algorithms, ubiquitous computing, ubiquitous personal computers},
    pages = {2286--2293 Vol. 3}
    },
    
@incollection{yu_parallel_2005,
    title = {Parallel Genetic Algorithms on Programmable Graphics Hardware},
    url = {http://dx.doi.org/10.1007/11539902_134},
    abstract = {Parallel genetic algorithms are usually implemented on parallel machines or distributed systems. This paper describes how fine-grained parallel genetic algorithms can be mapped to programmable graphics hardware found in commodity {PC.} Our approach stores chromosomes and their fitness values in texture memory on graphics card. Both fitness evaluation and genetic operations are implemented entirely with fragment programs executed on graphics processing unit in parallel. We demonstrate the effectiveness of our approach by comparing it with compatible software implementation. The presented approach allows us benefit from the advantages of parallel genetic algorithms on low-cost platform.},
    booktitle = {Advances in Natural Computation},
    author = {Qizhi Yu and Chongcheng Chen and Zhigeng Pan},
    year = {2005},
    pages = {1051--1059}
    },
    
@inproceedings{man-leung_wong_parallel_2006,
    title = {Parallel Hybrid Genetic Algorithms on {Consumer-Level} Graphics Hardware},
    doi = {{10.1109/CEC.2006.1688683}},
    abstract = {In this paper, we report a parallel hybrid genetic algorithm {(HGA)} on consumer-level graphics cards. {HGA} extends the classical genetic algorithm by incorporating the Cauchy mutation operator from evolutionary programming. In our parallel {HGA,} all steps except the random number generation procedure are performed in graphics processing unit {(GPU)} and thus our parallel {HGA} can be executed effectively and efficiently. We propose the pseudo-deterministic selection method which is comparable to the traditional global selection approach with significant execution time performance advantages. We perform experiments to compare our parallel {HGA} with our previous parallel {FEP} (fast evolutionary programming) and demonstrate that the former is much more effective and efficient than the latter. The parallel and sequential implementations of {HGA} are compared in a number of experiments, it is observed that the former outperforms the latter significantly. The effectiveness and efficiency of the pseudo-deterministic selection method is also studied.},
    booktitle = {Evolutionary Computation, 2006. {CEC} 2006. {IEEE} Congress on},
    author = {{Man-Leung} Wong and {Tien-Tsin} Wong},
    year = {2006},
    keywords = {Cauchy mutation operator, computer graphic equipment, consumer-level graphics hardware, evolutionary programming, genetic algorithms, graphics processing unit, parallel algorithms, parallel hybrid genetic algorithm, pseudo-deterministic selection method, rendering (computer graphics)},
    pages = {2973--2980}
    },
    
@incollection{robilliard_population_2008,
    title = {Population Parallel {GP} on the G80 {GPU}},
    url = {http://dx.doi.org/10.1007/978-3-540-78671-9_9},
    abstract = {The availability of low cost powerful parallel graphics cards has stimulated a trend to port {GP} on Graphics Processing Units
    {(GPUs).} Previous works on {GPUs} have shown evaluation phase speedups for large training cases sets. Using the {CUDA} language
    on the G80 {GPU,} we show it is possible to efficiently interpret several {GP} programs in parallel, thus obtaining speedups also
    for small training sets starting at less than 100 training cases. Our scheme was embedded in the well-known {ECJ} library, providing
    an easy entry point for owners of G80 {GPUs.}},
    booktitle = {Genetic Programming},
    author = {Denis Robilliard and Virginie {Marion-Poty} and Cyril Fonlupt},
    year = {2008},
    pages = {98--109}
    }
    
    
@inproceedings{DBLP:conf/gecco/Zhu09,
    author = {Weihang Zhu},
    title = {A study of parallel evolution strategy: pattern search on
    a GPU computing platform},
    booktitle = {GEC Summit},
    year = {2009},
    pages = {765-772},
    ee = {http://doi.acm.org/10.1145/1543834.1543939},
    editor = {Lihong Xu and
    Erik D. Goodman and
    Guoliang Chen and
    Darrell Whitley and
    Yongsheng Ding},
    title = {Genetic and Evolutionary Computation Conference, GEC Summit
    2009, Proceedings, Shanghai, China, June 12-14, 2009},
    booktitle = {GEC Summit},
    publisher = {ACM},
    year = {2009},
    isbn = {978-1-60558-326-6},
    bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
@inproceedings{1570355,
    author = {Tsutsui, Shigeyoshi and Fujimoto, Noriyuki},
    title = {Solving quadratic assignment problems by genetic algorithms with GPU computation: a case study},
    booktitle = {GECCO '09: Proceedings of the 11th annual conference companion on Genetic and evolutionary computation conference},
    year = {2009},
    isbn = {978-1-60558-505-5},
    pages = {2523--2530},
    location = {Montreal, Qu\'{e}bec, Canada},
    doi = {http://doi.acm.org/10.1145/1570256.1570355},
    publisher = {ACM},
    address = {New York, NY, USA},
    }
    
@inproceedings{1570356,
    author = {Wilson, Garnett and Banzhaf, Wolfgang},
    title = {Deployment of CPU and GPU-based genetic programming on heterogeneous devices},
    booktitle = {GECCO '09: Proceedings of the 11th annual conference companion on Genetic and evolutionary computation conference},
    year = {2009},
    isbn = {978-1-60558-505-5},
    pages = {2531--2538},
    location = {Montreal, Qu\'{e}bec, Canada},
    doi = {http://doi.acm.org/10.1145/1570256.1570356},
    publisher = {ACM},
    address = {New York, NY, USA},
    }
    
@inproceedings{1570419,
    author = {Banzhaf, Wolfgang and Harding, Simon},
    title = {Accelerating evolutionary computation with graphics processing units},
    booktitle = {GECCO '09: Proceedings of the 11th annual conference companion on Genetic and evolutionary computation conference},
    year = {2009},
    isbn = {978-1-60558-505-5},
    pages = {3237--3286},
    location = {Montreal, Qu\'{e}bec, Canada},
    doi = {http://doi.acm.org/10.1145/1570256.1570419},
    publisher = {ACM},
    address = {New York, NY, USA},
    }
    
@inproceedings{1570352,
    author = {Cavanagh, Joseph M. and Potok, Thomas E. and Cui, Xiaohui},
    title = {Parallel latent semantic analysis using a graphics processing unit},
    booktitle = {GECCO '09: Proceedings of the 11th annual conference companion on Genetic and evolutionary computation conference},
    year = {2009},
    isbn = {978-1-60558-505-5},
    pages = {2505--2510},
    location = {Montreal, Qu\'{e}bec, Canada},
    doi = {http://doi.acm.org/10.1145/1570256.1570352},
    publisher = {ACM},
    address = {New York, NY, USA},
    }
    
@inproceedings{1570353,
    author = {Langdon, W. B.},
    title = {A fast high quality pseudo random number generator for nVidia CUDA},
    booktitle = {GECCO '09: Proceedings of the 11th annual conference companion on Genetic and evolutionary computation conference},
    year = {2009},
    isbn = {978-1-60558-505-5},
    pages = {2511--2514},
    location = {Montreal, Qu\'{e}bec, Canada},
    doi = {http://doi.acm.org/10.1145/1570256.1570353},
    publisher = {ACM},
    address = {New York, NY, USA},
    }
    
@inproceedings{1570354,
    author = {Wong, Man Leung},
    title = {Parallel multi-objective evolutionary algorithms on graphics processing units},
    booktitle = {GECCO '09: Proceedings of the 11th annual conference companion on Genetic and evolutionary computation conference},
    year = {2009},
    isbn = {978-1-60558-505-5},
    pages = {2515--2522},
    location = {Montreal, Qu\'{e}bec, Canada},
    doi = {http://doi.acm.org/10.1145/1570256.1570354},
    publisher = {ACM},
    address = {New York, NY, USA},
    }
    
@inproceedings{1570350,
    author = {Perez-Miguel, Carlos and Miguel-Alonso, Jose and Mendiburu, Alexander},
    title = {Evaluating the cell broadband engine as a platform to run estimation of distribution algorithms},
    booktitle = {GECCO '09: Proceedings of the 11th annual conference companion on Genetic and evolutionary computation conference},
    year = {2009},
    isbn = {978-1-60558-505-5},
    pages = {2491--2498},
    location = {Montreal, Qu\'{e}bec, Canada},
    doi = {http://doi.acm.org/10.1145/1570256.1570350},
    publisher = {ACM},
    address = {New York, NY, USA},
    }
    
@inproceedings{1570351,
    author = {Rieffel, John and Saunders, Frank and Nadimpalli, Shilpa and Zhou, Harvey and Hassoun, Soha and Rife, Jason and Trimmer, Barry},
    title = {Evolving soft robotic locomotion in PhysX},
    booktitle = {GECCO '09: Proceedings of the 11th annual conference companion on Genetic and evolutionary computation conference},
    year = {2009},
    isbn = {978-1-60558-505-5},
    pages = {2499--2504},
    location = {Montreal, Qu\'{e}bec, Canada},
    doi = {http://doi.acm.org/10.1145/1570256.1570351},
    publisher = {ACM},
    address = {New York, NY, USA},
    }
    
    
@TechReport{langdon:2009:TR-09-05,
    author = "W. B. Langdon",
    title = "A {CUDA} {SIMT} Interpreter for Genetic~Programming",
    institution = "Department of Computer Science, King's College London",
    year = "2009",
    number = "TR-09-05",
    address = "Strand, WC2R 2LS, UK",
    month = "18 " # jun,
    note = "Revised",
    keywords = "genetic algorithms, genetic programming, GPU, GPGPU,
    Tesla, sub-machine code GP, CUDA",
    URL = "http://www.dcs.kcl.ac.uk/technical-reports/papers/TR-09-05.pdf",
    URL2 = "http://www.gpgpgpu.com/gecco2009/5.pdf",
    abstract = "A Single Instruction Multiple Thread CUDA interpreter
    provides SIMD like parallel evaluation of the whole GP
    population of quarter of a million RPN expressions on
    graphics cards and nVidia Tesla T10P. Using sub-machine
    code GP a sustain peak performance of 212 billion GP
    operations per second (3300 speed up) and an average of
    4.5 peta GP ops per day is reported for a single card
    on a Boolean induction benchmark never attempted
    before, let alone solved.",
    size = "2 pages",
    }
    
@inproceedings{1543939,
    author = {Zhu, Weihang},
    title = {A study of parallel evolution strategy: pattern search on a GPU computing platform},
    booktitle = {GEC '09: Proceedings of the first ACM/SIGEVO Summit on Genetic and Evolutionary Computation},
    year = {2009},
    isbn = {978-1-60558-326-6},
    pages = {765--772},
    location = {Shanghai, China},
    doi = {http://doi.acm.org/10.1145/1543834.1543939},
    publisher = {ACM},
    address = {New York, NY, USA},
    }
    
@incollection{zhu_multi-walk_2009,
    title = {Multi-walk Parallel Pattern Search Approach on a {GPU} Computing Platform},
    url = {http://dx.doi.org/10.1007/978-3-642-01970-8_99},
    abstract = {This paper studies the efficiency of using Pattern Search {(PS)} on bound constrained optimization functions on a Graphics Processing
    Unit {(GPU)} computing platform. Pattern Search is a direct search optimization technique that does not require derivative information
    on non-linear programming problems. Pattern Search is ideally suited to a {GPU} computing environment due to its low memory
    requirement and no communication between threads in a multi-walk setting. To adapt to a {GPU} environment, traditional Pattern
    Search is modified by terminating based on iterations instead of tolerance. This research designed and implemented a multi-walk
    Pattern Search algorithm on a {GPU} computing platform. Computational results are promising with a computing speedup of 100+
    compared to a corresponding implementation on a single {CPU.}},
    booktitle = {Computational Science – {ICCS} 2009},
    author = {Weihang Zhu and James Curry},
    year = {2009},
    pages = {984--993}
    }
    
    
@inproceedings{1570089,
    author = {Maitre, Ogier and Baumes, Laurent A. and Lachiche, Nicolas and Corma, Avelino and Collet,Pierre},
    title = {Coarse grain parallelization of evolutionary algorithms on {GPGPU} cards with {EASEA}},
    booktitle = {GECCO '09: Proceedings of the 11th Annual conference on Genetic and evolutionary computation},
    year = {2009},
    isbn = {978-1-60558-325-9},
    pages = {1403--1410},
    location = {Montreal, Qu\'{e}bec, Canada},
    doi = {http://doi.acm.org/10.1145/1569901.1570089},
    url = {http://portal.acm.org/citation.cfm?id=1570089},
    publisher = {ACM},
    address = {New York, NY, USA},
    }
    
@INPROCEEDINGS{Diaz2009,
    author = {Josefa Díaz and Francisco Fernández de Vega and J. Ignacio Hidalgo
    and Oscar Garnica and Sonia López},
    title = {Applying Genetic Algorithms to Resizable Caches Configuration for
    Improving SMT Performance},
    booktitle = {WPABA'09: Proceedings of the Second International Workshop on Parallel
    Architectures and Bioinspired Algorithms (WPABA 2009)},
    year = {2009},
    editor = {José L. Risco-Martín and Oscar Garnica},
    pages = {39-48},
    address = {Raleigh, NC, USA},
    month = {September 12-16},
    publisher = {Universidad Complutense de Madrid},
    owner = {jlrisco},
    timestamp = {2009.09.30}
    }
    
@INPROCEEDINGS{Harding2009,
    author = {Simon Harding and Wolfgang Banzhaf},
    title = {Distributed Genetic Programming on GPUs using CUDA},
    booktitle = {WPABA'09: Proceedings of the Second International Workshop on Parallel
    Architectures and Bioinspired Algorithms (WPABA 2009)},
    year = {2009},
    editor = {José L. Risco-Martín and Oscar Garnica},
    pages = {1-10},
    address = {Raleigh, NC, USA},
    month = {September 12-16},
    publisher = {Universidad Complutense de Madrid},
    owner = {jlrisco},
    timestamp = {2009.09.30},
    URL = "http://www.evolutioninmaterio.com/preprints/CudaParallelCompilePP.pdf",
    abstract = "Using of a cluster of Graphics Processing Unit (GPU)
    equipped computers, it is possible to accelerate the
    evaluation of individuals in Genetic Programming.
    Program compilation, fitness case data and fitness
    execution are spread over the cluster of computers,
    allowing for the efficient processing of very large
    datasets. Here, the implementation is demonstrated on
    datasets containing over 10 million rows and several
    hundred megabytes in size.
    
    Populations of candidate individuals are compiled into
    NVidia CUDA programs and executed on a set of client
    computers - each with a different subset of the
    dataset.
    
    The paper discusses the implementation of the system
    and acts as a tutorial for other researchers
    experimenting with genetic programming and GPUs.",
    
    }
    
@INPROCEEDINGS{Napoli2009,
    author = {Claudia Di Napoli and Maurizio Giordano and Zsolt Németh and Nicola
    Tonellotto},
    title = {A Chemical Metaphor to Model Service Selection for Composition of
    Services},
    booktitle = {WPABA'09: Proceedings of the Second International Workshop on Parallel
    Architectures and Bioinspired Algorithms (WPABA 2009)},
    year = {2009},
    editor = {José L. Risco-Martín and Oscar Garnica},
    pages = {11-20},
    address = {Raleigh, NC, USA},
    month = {September 12-16},
    publisher = {Universidad Complutense de Madrid},
    owner = {jlrisco},
    timestamp = {2009.09.30}
    }
    
@INPROCEEDINGS{PerezMiguel2009,
    author = {Carlos Pérez-Miguel and José Miguel-Alonso and Alexander Mendiburu},
    title = {Porting Estimation of Distribution Algorithms to the Cell Broadband
    Engine},
    booktitle = {WPABA'09: Proceedings of the Second International Workshop on Parallel
    Architectures and Bioinspired Algorithms (WPABA 2009)},
    year = {2009},
    editor = {José L. Risco-Martín and Oscar Garnica},
    pages = {31-38},
    address = {Raleigh, NC, USA},
    month = {September 12-16},
    publisher = {Universidad Complutense de Madrid},
    owner = {jlrisco},
    timestamp = {2009.09.30}
    }
    
@INPROCEEDINGS{RiscoMartin2009,
    author = {José L. Risco-Martín and José M. Colmenar and Rubén Gonzalo},
    title = {A Parallel Evolutionary Algorithm to Optimize Dynamic Memory Managers
    in Embedded Systems},
    booktitle = {WPABA'09: Proceedings of the Second International Workshop on Parallel
    Architectures and Bioinspired Algorithms (WPABA 2009)},
    year = {2009},
    editor = {José L. Risco-Martín and Oscar Garnica},
    pages = {21-30},
    address = {Raleigh, NC, USA},
    month = {September 12-16},
    publisher = {Universidad Complutense de Madrid},
    owner = {jlrisco},
    timestamp = {2009.09.30}
    }
    
@InProceedings{DBLP:conf/gecco/LewisM09,
    author = "Tony E. Lewis and George D. Magoulas",
    title = "Strategies to minimise the total run time of cyclic
    graph based genetic programming with {GPU}s",
    booktitle = "GECCO '09: Proceedings of the 11th Annual conference
    on Genetic and evolutionary computation",
    year = "2009",
    editor = "Guenther Raidl and Franz Rothlauf and
    Giovanni Squillero and Rolf Drechsler and others",
    pages = "1379--1386",
    address = "Montreal",
    publisher = "ACM",
    publisher_address = "New York, NY, USA",
    month = "8-12 " # jul,
    organisation = "SigEvo",
    keywords = "genetic algorithms, genetic programming, cyclic
    cartesian genetic programming, GPU, deme, parallel
    interpretter",
    isbn13 = "978-1-60558-325-9",
    bibsource = "DBLP, http://dblp.uni-trier.de",
    URL = "http://doi.acm.org/10.1145/1569901.1570086",
    size = "8 pages",
    
    }
    
@inproceedings{DBLP:conf/ausai/XuCKLV09,
    author = {Yanyan Xu and Hui Chen and Reinhard Klette and Jiaju Liu and Tobi Vaudrey},
    title = {Belief Propagation Implementation Using CUDA on an NVIDIA GTX 280},
    booktitle = {Australasian Conference on Artificial Intelligence},
    year = {2009},
    pages = {180-189},
    URL = {http://dx.doi.org/10.1007/978-3-642-10439-8_19},
    bibsource = {DBLP, http://dblp.uni-trier.de},
    publisher = {Springer},
    series = {Lecture Notes in Computer Science},
    editor = {Ann E. Nicholson and Xiaodong Li}
    }
    
    
@InCollection{langdon:2009:pdci,
    author = "W. B. Langdon",
    title = "Large Scale Bioinformatics Data Mining with Parallel
    Genetic Programming on Graphics Processing Units",
    booktitle = "Parallel and Distributed Computational Intelligence",
    publisher = "Springer",
    year = "2010",
    editor = "Francisco Fernandez {de Vega} and Erick Cantu-Paz",
    volume = "279",
    series = "Studies in Computational Intelligence",
    chapter = "5",
    pages = "113--141",
    month = jan,
    keywords = "genetic algorithms, genetic programming, GPU",
    isbn13 = "978-3642106743",
    URL = "http://www.springer.com/engineering/book/978-3-642-10674-3",
    doi = "doi:10.1007/978-3-642-10675-0_6",
    abstract = "A suitable single instruction multiple data GP
    interpreter can achieve high (Giga GPop/second)
    performance on a SIMD GPU graphics card by
    simultaneously running multiple diverse members of the
    genetic programming population. SPMD dataflow
    parallelisation is achieved because the single
    interpreter treats the different GP programs as data.
    On a single 128 node parallel nVidia GeForce 8800 GTX
    GPU, the interpreter can out run a compiled approach,
    where data parallelisation comes only by running a
    single program at a time across multiple inputs.
    
    The RapidMind GPGPU Linux C++ system has been
    demonstrated by predicting ten year+ outcome of breast
    cancer from a dataset containing a million inputs. NCBI
    GEO GSE3494 contains hundreds of Affymetrix
    \mbox{HG-U133A} and HG-U133B GeneChip biopsies.
    Multiple GP runs each with a population of five million
    programs winnow useful variables from the chaff at more
    than 500 million GPops per second. Sources available
    via
    \href{http://www.cs.ucl.ac.uk/staff/W.Langdon/ftp/gp-code/gpu_gp_2.tar.gz}
    {FTP}.",
    size = "28 pages",
    }
@InProceedings{langdon:2010:eurogp,
    author = "W. B. Langdon",
    title = "A Many Threaded {CUDA} Interpreter for Genetic
    Programming",
    booktitle = "EuroGP 2010",
    year = "2010",
    editor = "Anna I Esparcia-Alcazar and Aniko Ekart and
    Sara Silva",
    address = "Istanbul",
    month = "7-9 " # apr,
    organisation = "EvoStar",
    keywords = "genetic algorithms, genetic programming, GPU, GPGPU,
    Tesla, sub-machine code GP, Reverse Polish Notation",
    URL = "http://www.cs.ucl.ac.uk/staff/W.Langdon/ftp/papers/langdon_2010_eurogp.pdf",
    URL = "http://www.cs.ucl.ac.uk/staff/W.Langdon/ftp/papers/langdon_2010_eurogp.ps.gz",
    size = "12 pages",
    abstract = "A Single Instruction Multiple Thread CUDA interpreter
    provides SIMD like parallel evaluation of the whole GP
    population of 0.25 million trees reverse polish
    notation (RPN) expressions on graphics cards and nVidia
    Tesla T10P. Using sub-machine code GP a sustain peak
    performance of 215 billion GP operations per second
    (3400 speed up) and an average of 10 peta GP ops per
    day is reported for a single GPU card on a Boolean
    induction benchmark never attempted before, let alone
    solved.",
    }
    
@Article{Robilliard:2009:GPEM,
    author = "Denis Robilliard and Virginie Marion-Poty and
    Cyril Fonlupt",
    title = "Genetic programming on graphics processing units",
    journal = "Genetic Programming and Evolvable Machines",
    year = "2009",
    volume = "10",
    number = "4",
    pages = "447--471",
    month = dec,
    note = "Special issue on parallel and distributed evolutionary
    algorithms, part I",
    keywords = "genetic algorithms, genetic programming, Graphics
    processing units, GPU, Parallel processing",
    ISSN = "1389-2576",
    URL = "http://www-lil.univ-littoral.fr/~robillia/Publis/GPonGPU09.ps.gz",
    doi = "doi:10.1007/s10710-009-9092-3",
    size = "25 pages",
    abstract = "The availability of low cost powerful parallel
    graphics cards has stimulated the port of Genetic
    Programming (GP) on Graphics Processing Units (GPUs).
    Our work focuses on the possibilities offered by Nvidia
    G80 GPUs when programmed in the CUDA language. In a
    first work we have showed that this setup allows to
    develop fine grain parallelization schemes to evaluate
    several GP programs in parallel, while obtaining
    speedups for usual training sets and program sizes.
    Here we present another parallelization scheme and
    optimizations about program representation and use of
    GPU fast memory. This increases the computation speed
    about three times faster, up to 4 billion GP operations
    per second. The code has been developed within the well
    known ECJ library and is open source.",
    notes = "ECJ JNI Java Native Interface to CUDA. RPN. Thread
    divergence. nVidia 8800 GTX. Sextic symbolic
    regression, 6-mux and 11-multiplexor, intertwined
    spirals, Mackey-Glass \cite{langdon:2008:eurogp}. Data
    cache not faster???
    
    Code via
    http://www-lil.univ-littoral.fr/~robillia/GPUregression.html",
    }
    
@inproceedings{DBLP:conf/evoW/LuongMT10,
    author = {Th{\'e} Van Luong and
    Nouredine Melab and
    El-Ghazali Talbi},
    title = {Local Search Algorithms on Graphics Processing Units. A
    Case Study: The Permutation Perceptron Problem},
    booktitle = {EvoCOP},
    year = {2010},
    pages = {264-275},
    ee = {http://dx.doi.org/10.1007/978-3-642-12139-5_23},
    crossref = {DBLP:conf/evoW/2010cop},
    bibsource = {DBLP, http://dblp.uni-trier.de}
    }
@proceedings{DBLP:conf/evoW/2010cop,
    editor = {Peter I. Cowling and
    Peter Merz},
    title = {Evolutionary Computation in Combinatorial Optimization,
    10th European Conference, EvoCOP 2010, Istanbul, Turkey,
    April 7-9, 2010. Proceedings},
    booktitle = {EvoCOP},
    publisher = {Springer},
    series = {Lecture Notes in Computer Science},
    volume = {6022},
    year = {2010},
    isbn = {978-3-642-12138-8},
    ee = {http://dx.doi.org/10.1007/978-3-642-12139-5},
    bibsource = {DBLP, http://dblp.uni-trier.de}
    }
    
@inproceedings{Tsutsui:2011:GECCO,
    author = {Shigeyoshi Tsutsui and Noriyuki Fujimoto},
    title = {ACO with tabu search on a GPU for solving QAPs using move-cost adjusted thread assignment},
    booktitle = {GECCO '11: Proceedings of the 13th annual conference on Genetic and evolutionary computation},
    year = {2011},
    editor = {Natalio Krasnogor and Pier Luca Lanzi and Andries Engelbrecht and David Pelta and Carlos Gershenson and Giovanni Squillero and Alex Freitas and Marylyn Ritchie and Mike Preuss and Christian Gagne and Yew Soon Ong and Guenther Raidl and Marcus Gallager and Jose Lozano and Carlos Coello-Coello and Dario Landa Silva and Nikolaus Hansen and Silja Meyer-Nieberg and Jim Smith and Gus Eiben and Ester Bernado-Mansilla and Will Browne and Lee Spector and Tina Yu and Jeff Clune and Greg Hornby and Man-Leung Wong and Pierre Collet and Steve Gustafson and Jean-Paul Watson and Moshe Sipper and Simon Poulding and Gabriela Ochoa and Marc Schoenauer and Carsten Witt and Anne Auger},
    isbn = {978-1-4503-0557-0},
    pages = {1547--1554},
    keywords = {Parallel evolutionary systems},
    month = {12-16 July},
    organisation = {SIGEVO},
    address = {Dublin, Ireland},
    doi = {doi:10.1145/2001576.2001785},
    publisher = {ACM},
    publisher_address = {New York, NY, USA},
    abstract = {This paper proposes a parallel ant colony optimization (ACO) for solving quadratic assignment problems (QAPs) on a graphics processing unit (GPU) by combining tabu (TS) with ACO in CUDA (ompute unified device architecture). In TS for QAP, all neighbor moves are tested. These moves form two groups based on computing of move cost. In one group, the computing of cost is O(1) and in the other group, the computing of move cost is O(n). We compute these two groups of moves in parallel by assigning the computations to threads of CUDA. In this assignment, we propose an efficient method which we call Move-Cost Adjusted Thread Assignment (MATA). The results with GPU computation with MATA show a promising speedup compared to computation with the CPU. It is also shown that MATA is effective in applying 2-opt local search.},
    notes = {Also known as \cite{2001785}
    GECCO-2011
    A joint meeting of the twentieth international conference on
    genetic algorithms (ICGA-2011) and the sixteenth annual genetic
    programming conference (GP-2011)},
    }
    
@inproceedings{Mussi:2011:GECCO,
    author = {Luca Mussi and Youssef S.G. Nashed and Stefano Cagnoni},
    title = {GPU-based asynchronous particle swarm optimization},
    booktitle = {GECCO '11: Proceedings of the 13th annual conference on Genetic and evolutionary computation},
    year = {2011},
    editor = {Natalio Krasnogor and Pier Luca Lanzi and Andries Engelbrecht and David Pelta and Carlos Gershenson and Giovanni Squillero and Alex Freitas and Marylyn Ritchie and Mike Preuss and Christian Gagne and Yew Soon Ong and Guenther Raidl and Marcus Gallager and Jose Lozano and Carlos Coello-Coello and Dario Landa Silva and Nikolaus Hansen and Silja Meyer-Nieberg and Jim Smith and Gus Eiben and Ester Bernado-Mansilla and Will Browne and Lee Spector and Tina Yu and Jeff Clune and Greg Hornby and Man-Leung Wong and Pierre Collet and Steve Gustafson and Jean-Paul Watson and Moshe Sipper and Simon Poulding and Gabriela Ochoa and Marc Schoenauer and Carsten Witt and Anne Auger},
    isbn = {978-1-4503-0557-0},
    pages = {1555--1562},
    keywords = {Parallel evolutionary systems},
    month = {12-16 July},
    organisation = {SIGEVO},
    address = {Dublin, Ireland},
    doi = {doi:10.1145/2001576.2001786},
    publisher = {ACM},
    publisher_address = {New York, NY, USA},
    abstract = {This paper describes our latest implementation of Particle Swarm Optimization (PSO) with simple ring topology for modern Graphic Processing Units (GPUs). To achieve both the fastest execution time and the best performance, we designed a parallel version of the algorithm, as fine-grained as possible, without introducing explicit synchronization mechanisms among the particles' evolution processes. The results we obtained show a significant speed-up with respect to both the sequential version of the algorithm run on an up-to-date CPU and our previously developed parallel implementation within the nVIDIA CUDA architecture.},
    notes = {Also known as \cite{2001786}
    GECCO-2011
    A joint meeting of the twentieth international conference on
    genetic algorithms (ICGA-2011) and the sixteenth annual genetic
    programming conference (GP-2011)},
    }
    
@inproceedings{Solomon:2011:GECCO,
    author = {Steven Solomon and Parimala Thulasiraman and Ruppa Thulasiram},
    title = {Collaborative multi-swarm PSO for task matching using graphics processing units},
    booktitle = {GECCO '11: Proceedings of the 13th annual conference on Genetic and evolutionary computation},
    year = {2011},
    editor = {Natalio Krasnogor and Pier Luca Lanzi and Andries Engelbrecht and David Pelta and Carlos Gershenson and Giovanni Squillero and Alex Freitas and Marylyn Ritchie and Mike Preuss and Christian Gagne and Yew Soon Ong and Guenther Raidl and Marcus Gallager and Jose Lozano and Carlos Coello-Coello and Dario Landa Silva and Nikolaus Hansen and Silja Meyer-Nieberg and Jim Smith and Gus Eiben and Ester Bernado-Mansilla and Will Browne and Lee Spector and Tina Yu and Jeff Clune and Greg Hornby and Man-Leung Wong and Pierre Collet and Steve Gustafson and Jean-Paul Watson and Moshe Sipper and Simon Poulding and Gabriela Ochoa and Marc Schoenauer and Carsten Witt and Anne Auger},
    isbn = {978-1-4503-0557-0},
    pages = {1563--1570},
    keywords = {Parallel evolutionary systems},
    month = {12-16 July},
    organisation = {SIGEVO},
    address = {Dublin, Ireland},
    doi = {doi:10.1145/2001576.2001787},
    publisher = {ACM},
    publisher_address = {New York, NY, USA},
    abstract = {We investigate the performance of a highly parallel Particle Swarm Optimization (PSO) algorithm implemented on the GPU. In order to achieve this high degree of parallelism we implement a collaborative multi-swarm PSO algorithm on the GPU which relies on the use of many swarms rather than just one. We choose to apply our PSO algorithm against a real-world application: the task matching problem in a heterogeneous distributed computing environment. Due to the potential for large problem sizes with high dimensionality, the task matching problem proves to be very thorough in testing the GPUs capabilities for handling PSO. Our results show that the GPU offers a high degree of performance and achieves a maximum of 37 times speedup over a sequential implementation when the problem size in terms of tasks is large and many swarms are used.},
    notes = {Also known as \cite{2001787}
    GECCO-2011
    A joint meeting of the twentieth international conference on
    genetic algorithms (ICGA-2011) and the sixteenth annual genetic
    programming conference (GP-2011)},
    }
    
    
@inproceedings{Kromer:2011:GECCO,
    author = {Pavel Kr\"{o}mer and V\'{a}clav Sn\aa\v{s}el and Jan Plato\v{s} and Ajith Abraham},
    title = {Many-threaded implementation of differential evolution for the CUDA platform},
    booktitle = {GECCO '11: Proceedings of the 13th annual conference on Genetic and evolutionary computation},
    year = {2011},
    editor = {Natalio Krasnogor and Pier Luca Lanzi and Andries Engelbrecht and David Pelta and Carlos Gershenson and Giovanni Squillero and Alex Freitas and Marylyn Ritchie and Mike Preuss and Christian Gagne and Yew Soon Ong and Guenther Raidl and Marcus Gallager and Jose Lozano and Carlos Coello-Coello and Dario Landa Silva and Nikolaus Hansen and Silja Meyer-Nieberg and Jim Smith and Gus Eiben and Ester Bernado-Mansilla and Will Browne and Lee Spector and Tina Yu and Jeff Clune and Greg Hornby and Man-Leung Wong and Pierre Collet and Steve Gustafson and Jean-Paul Watson and Moshe Sipper and Simon Poulding and Gabriela Ochoa and Marc Schoenauer and Carsten Witt and Anne Auger},
    isbn = {978-1-4503-0557-0},
    pages = {1595--1602},
    keywords = {Parallel evolutionary systems},
    month = {12-16 July},
    organisation = {SIGEVO},
    address = {Dublin, Ireland},
    doi = {doi:10.1145/2001576.2001791},
    publisher = {ACM},
    publisher_address = {New York, NY, USA},
    abstract = {Differential evolution is an efficient populational meta -- heuristic optimization algorithm successful in solving difficult real world problems. Due to the simplicity of its operations and data structures, it is suitable for a parallel implementation on multicore systems and on the GPU. In this paper, we design a simple yet highly parallel implementation of the differential evolution using the CUDA architecture. We demonstrate the speedup obtained by the proposed parallelization of the differential evolution on an NP hard combinatorial optimization problem and on a benchmark function of many variables.},
    notes = {Also known as \cite{2001791}
    GECCO-2011
    A joint meeting of the twentieth international conference on
    genetic algorithms (ICGA-2011) and the sixteenth annual genetic
    programming conference (GP-2011)},
    }
    
@inproceedings{Simonsen:2011:GECCO,
    author = {Martin Simonsen and Christian N.S. Pedersen and Mikael H. Christensen and Ren\'{e} Thomsen},
    title = {GPU-accelerated high-accuracy molecular docking using guided differential evolution: real world applications},
    booktitle = {GECCO '11: Proceedings of the 13th annual conference on Genetic and evolutionary computation},
    year = {2011},
    editor = {Natalio Krasnogor and Pier Luca Lanzi and Andries Engelbrecht and David Pelta and Carlos Gershenson and Giovanni Squillero and Alex Freitas and Marylyn Ritchie and Mike Preuss and Christian Gagne and Yew Soon Ong and Guenther Raidl and Marcus Gallager and Jose Lozano and Carlos Coello-Coello and Dario Landa Silva and Nikolaus Hansen and Silja Meyer-Nieberg and Jim Smith and Gus Eiben and Ester Bernado-Mansilla and Will Browne and Lee Spector and Tina Yu and Jeff Clune and Greg Hornby and Man-Leung Wong and Pierre Collet and Steve Gustafson and Jean-Paul Watson and Moshe Sipper and Simon Poulding and Gabriela Ochoa and Marc Schoenauer and Carsten Witt and Anne Auger},
    isbn = {978-1-4503-0557-0},
    pages = {1803--1810},
    keywords = {Real world applications},
    month = {12-16 July},
    organisation = {SIGEVO},
    address = {Dublin, Ireland},
    doi = {doi:10.1145/2001576.2001818},
    publisher = {ACM},
    publisher_address = {New York, NY, USA},
    abstract = {The objective in molecular docking is to determine the best binding mode of two molecules in silico. A common application of molecular docking is in drug discovery where a large number of ligands are docked against a protein to identify potential drug candidates. This is a computationally intensive problem especially if flexibility of the molecules are taken into account. In this paper we show how MolDock, which is a high accuracy method for flexible molecular docking using a variant of differential evolution, can be parallelised on both CPU and GPU. The methods presented for parallelising the workload result in an average speedup of 3.9x on a 4-core CPU and 27.4x on a comparable CUDA enabled GPU when docking 133 ligands of different sizes. Furthermore, the presented parallelisation schemes are generally applicable and can easily be adapted to other common flexible docking methods.},
    notes = {Also known as \cite{2001818}
    GECCO-2011
    A joint meeting of the twentieth international conference on
    genetic algorithms (ICGA-2011) and the sixteenth annual genetic
    programming conference (GP-2011)},
    }
    
@inproceedings{Camarg:2011:GECCOcomp,
    author = {amargo Bare },
    {n}o, Carlos Iv\'{a}n and Pedraza Bonilla, Cesar Augusto and Ni\,
    {n}o, Luis Fernado and Martinez Torre, Jos\'{e} Ignacio},
    title = {Intrinsic evolvable hardware for combinatorial synthesis based on SoC+FPGA and GPU platforms},
    booktitle = {GECCO '11: Proceedings of the 13th annual conference companion on Genetic and evolutionary computation},
    year = {2011},
    editor = {Natalio Krasnogor and Pier Luca Lanzi and Andries Engelbrecht and David Pelta and Carlos Gershenson and Giovanni Squillero and Alex Freitas and Marylyn Ritchie and Mike Preuss and Christian Gagne and Yew Soon Ong and Guenther Raidl and Marcus Gallager and Jose Lozano and Carlos Coello-Coello and Dario Landa Silva and Nikolaus Hansen and Silja Meyer-Nieberg and Jim Smith and Gus Eiben and Ester Bernado-Mansilla and Will Browne and Lee Spector and Tina Yu and Jeff Clune and Greg Hornby and Man-Leung Wong and Pierre Collet and Steve Gustafson and Jean-Paul Watson and Moshe Sipper and Simon Poulding and Gabriela Ochoa and Marc Schoenauer and Carsten Witt and Anne Auger},
    isbn = {978-1-4503-0690-4},
    keywords = {genetic algorithms, genetic programming, GPU: Poster},
    pages = {189--190},
    month = {12-16 July},
    organisation = {SIGEVO},
    address = {Dublin, Ireland},
    doi = {doi:10.1145/2001858.2001964},
    publisher = {ACM},
    publisher_address = {New York, NY, USA},
    abstract = {This paper presents a novel a parallel genetic programming (PGP) Boolean synthesis implementation on a low cost cluster of an embedded open platform called SIE. Some tasks of the PGP have been accelerated through a hardware coprocessor called FCU, that allows to evaluate individuals onchip as intrinsic evolution. Results have been compared with GPU and HPC implementations, resulting in speedup values up to approximately 2 and 180 respectively.},
    notes = {Also known as \cite{2001964}
    Distributed on CD-ROM at GECCO-2011.
    
    ACM Order Number 910112.},
    }
    
@inproceedings{Cupertino:2011:GECCOcomp,
    author = {Leandro F. Cupertino and Cleomar P. Silva and Douglas M. Dias and Marco Aur\'{e}lio C. Pacheco and Cristiana Bentes},
    title = {Evolving CUDA PTX programs by quantum inspired linear genetic programming},
    booktitle = {GECCO 2011 Computational intelligence on consumer games and graphics hardware (CIGPU)},
    year = {2011},
    editor = {Simon Harding and W. B. Langdon and Man Leung Wong and Garnett Wilson and Tony Lewis},
    isbn = {978-1-4503-0690-4},
    keywords = {genetic algorithms, genetic programming, GPU},
    pages = {399--406},
    month = {12-16 July},
    organisation = {SIGEVO},
    address = {Dublin, Ireland},
    doi = {doi:10.1145/2001858.2002026},
    publisher = {ACM},
    publisher_address = {New York, NY, USA},
    abstract = {The tremendous computing power of Graphics Processing Units (GPUs) can be used to accelerate the evolution process in Genetic Programming (GP). The automatic generation of code using the GPU usually follows two different approaches: compiling each evolved or interpreting multiple programs. Both approaches, however, have performance drawbacks. In this work, we propose a novel approach where the GPU pseudo-assembly language, PTX (Parallel Thread Execution), is evolved. Evolving PTX programs is faster, since the compilation of a PTX program takes orders of magnitude less time than a CUDA program compilation on the CPU, and no interpreter is necessary. Another important aspect of our approach is that the evolution of PTX programs follows the Quantum Inspired Linear Genetic Programming (QILGP). Our approach, called QILGP3U (QILGP + GPGPU), enables the evolution on a single machine in a reasonable time, enhances the quality of the model with the use of PTX, and for big databases can be much faster than the CPU implementation.},
    notes = {Also known as \cite{2002026}
    Distributed on CD-ROM at GECCO-2011.
    
    ACM Order Number 910112.},
    }
@inproceedings{Sato:2011:GECCOcomp,
    author = {Yuji Sato and Naohiro Hasegawa and Mikiko Sato},
    title = {Acceleration of genetic algorithms for sudoku solution on many-core processors},
    booktitle = {GECCO 2011 Computational intelligence on consumer games and graphics hardware (CIGPU)},
    year = {2011},
    editor = {Simon Harding and W. B. Langdon and Man Leung Wong and Garnett Wilson and Tony Lewis},
    isbn = {978-1-4503-0690-4},
    keywords = {},
    pages = {407--414},
    month = {12-16 July},
    organisation = {SIGEVO},
    address = {Dublin, Ireland},
    doi = {doi:10.1145/2001858.2002027},
    publisher = {ACM},
    publisher_address = {New York, NY, USA},
    abstract = {In this paper, we use the problem of solving Sudoku puzzles to demonstrate the possibility of achieving practical processing time through the use of many-core processors for parallel processing in the application of genetic computation. To increase accuracy, we propose a genetic operation that takes building-block linkage into account. As a parallel processing model for higher performance, we use a multiple-population coarse-grained GA model to counter initial value dependence under the condition of a limited number of individuals. The genetic manipulation is also accelerated by the parallel processing of threads. In an evaluation using even very difficult problems, we show that execution times of several tens of seconds and several seconds can be obtained by parallel processing with the Intel Corei7 and NVIDIA GTX460, respectively, and that a correct solution rate of 100percent can be achieved in either case.},
    notes = {Also known as \cite{2002027}
    Distributed on CD-ROM at GECCO-2011.
    
    ACM Order Number 910112.},
    }
    
@inproceedings{Langdon:2011:GECCOcomp,
    author = {William B. Langdon},
    title = {Debugging CUDA},
    booktitle = {GECCO 2011 Computational intelligence on consumer games and graphics hardware (CIGPU)},
    year = {2011},
    editor = {Simon Harding and W. B. Langdon and Man Leung Wong and Garnett Wilson and Tony Lewis},
    isbn = {978-1-4503-0690-4},
    keywords = {},
    pages = {415--422},
    month = {12-16 July},
    organisation = {SIGEVO},
    address = {Dublin, Ireland},
    doi = {doi:10.1145/2001858.2002028},
    publisher = {ACM},
    publisher_address = {New York, NY, USA},
    abstract = {During six months of intensive nVidia CUDA C programming many bugs were created. We pass on the software engineering lessons learnt, particularly those relevant to parallel general-purpose computation on graphics hardware GPGPU.},
    notes = {Also known as \cite{2002028}
    Distributed on CD-ROM at GECCO-2011.
    
    ACM Order Number 910112.},
    }
    
    
    
@inproceedings{Langdon:2011:GECCOcomp,
    author = {William B. Langdon},
    title = {Performing with CUDA},
    booktitle = {GECCO 2011 Computational intelligence on consumer games and graphics hardware (CIGPU)},
    year = {2011},
    editor = {Simon Harding and W. B. Langdon and Man Leung Wong and Garnett Wilson and Tony Lewis},
    isbn = {978-1-4503-0690-4},
    keywords = {},
    pages = {423--430},
    month = {12-16 July},
    organisation = {SIGEVO},
    address = {Dublin, Ireland},
    doi = {doi:10.1145/2001858.2002029},
    publisher = {ACM},
    publisher_address = {New York, NY, USA},
    abstract = {Recently a GPGPU application had to be redesigned to overcome performance problems. A number of software engineering lessons were learnt from this and other projects. We describe those about obtaining high performance from nVidia GPUs and practical aspects of CUDA C software development.},
    notes = {Also known as \cite{2002029}
    Distributed on CD-ROM at GECCO-2011.
    
    ACM Order Number 910112.},
    }
    
    
    
@inproceedings{Pospichal:2011:GECCOcomp,
    author = {Petr Pospichal and Eoin Murphy and Michael O'Neill and Josef Schwarz and Jiri Jaros},
    title = {Acceleration of grammatical evolution using graphics processing units: computational intelligence on consumer games and graphics hardware},
    booktitle = {GECCO 2011 Computational intelligence on consumer games and graphics hardware (CIGPU)},
    year = {2011},
    editor = {Simon Harding and W. B. Langdon and Man Leung Wong and Garnett Wilson and Tony Lewis},
    isbn = {978-1-4503-0690-4},
    keywords = {genetic algorithms, genetic programming, grammatical evolution, GPU},
    pages = {431--438},
    month = {12-16 July},
    organisation = {SIGEVO},
    address = {Dublin, Ireland},
    doi = {doi:10.1145/2001858.2002030},
    publisher = {ACM},
    publisher_address = {New York, NY, USA},
    abstract = {Several papers show that symbolic regression is suitable for data analysis and prediction in financial markets. Grammatical Evolution (GE), a grammar-based form of Genetic Programming (GP), has been successfully applied in solving various tasks including symbolic regression. However, often the computational effort to calculate the fitness of a solution in GP can limit the area of possible application and/or the extent of experimentation undertaken. This paper deals with using mainstream graphics processing units (GPU) for acceleration of GE solving symbolic regression. GPU optimisation details are discussed and the NVCC compiler is analysed. We design an effective mapping of the algorithm to the CUDA framework, and in so doing must tackle constraints of the GPU approach, such as the PCI-express bottleneck and main memory transactions.
    
    This is the first occasion GE has been adapted for running on a GPU. We measure our implementation running on one core of CPU Core i7 and GPU GTX 480 together with a GE library written in JAVA, GEVA.
    
    Results indicate that our algorithm offers the same convergence, and it is suitable for a larger number of regression points where GPU is able to reach speedups of up to 39 times faster when compared to GEVA on a serial CPU code written in C. In conclusion, properly used, GPU can offer an interesting performance boost for GE tackling symbolic regression.},
    notes = {Also known as \cite{2002030}
    Distributed on CD-ROM at GECCO-2011.
    
    ACM Order Number 910112.},
    }
    
    
    
@inproceedings{Pedemonte:2011:GECCOcomp,
    author = {Mart\'{\i}n Pedemonte and Enrique Alba and Francisco Luna},
    title = {Bitwise operations for GPU implementation of genetic algorithms},
    booktitle = {GECCO 2011 Computational intelligence on consumer games and graphics hardware (CIGPU)},
    year = {2011},
    editor = {Simon Harding and W. B. Langdon and Man Leung Wong and Garnett Wilson and Tony Lewis},
    isbn = {978-1-4503-0690-4},
    keywords = {},
    pages = {439--446},
    month = {12-16 July},
    organisation = {SIGEVO},
    address = {Dublin, Ireland},
    doi = {doi:10.1145/2001858.2002031},
    publisher = {ACM},
    publisher_address = {New York, NY, USA},
    abstract = {Research on the implementation of evolutionary algorithms in graphics processing units (GPUs) has grown in recent years since it significantly reduces the execution time of the algorithm. A relevant aspect, which has received little attention in the literature, is the impact of the memory space occupied by the population in the performance of the algorithm, due to limited capacity of several memory spaces in the GPUs. In this paper we analyse the differences in performance of a binary Genetic Algorithm implemented on a GPU using a Boolean data type or packing multiple bits into a non Boolean data type. Our study considers the influence on the performance of single point and double point crossover for solving the classical One-Max problem. The results obtained show that packing bits for storing binary strings can reduce the execution time up to 50percent.},
    notes = {Also known as \cite{2002031}
    Distributed on CD-ROM at GECCO-2011.
    
    ACM Order Number 910112.},
    }
    
    
    
@inproceedings{Lewis:2011:GECCOcomp,
    author = {Tony E. Lewis and George D. Magoulas},
    title = {Identifying similarities in TMBL programs with alignment to quicken their compilation for GPUs: computational intelligence on consumer games and graphics hardware},
    booktitle = {GECCO 2011 Computational intelligence on consumer games and graphics hardware (CIGPU)},
    year = {2011},
    editor = {Simon Harding and W. B. Langdon and Man Leung Wong and Garnett Wilson and Tony Lewis},
    isbn = {978-1-4503-0690-4},
    keywords = {genetic algorithms, genetic programming, GPU},
    pages = {447--454},
    month = {12-16 July},
    organisation = {SIGEVO},
    address = {Dublin, Ireland},
    doi = {doi:10.1145/2001858.2002032},
    publisher = {ACM},
    publisher_address = {New York, NY, USA},
    abstract = {The most impressive accelerations of Genetic Programming (GP) using the Graphics Processing Unit (GPU) have been achieved by dynamically compiling new GPU code for each batch of individuals to be evaluated. This approach suffers an overhead in compilation time.
    
    We aim to reduce this penalty by pre-processing the individuals to identify and draw out their similarities, hence reducing duplication in compilation work. We use this approach with Tweaking Mutation Behaviour Learning (TMBL), a form focused on long term fitness growth. For individuals of 300 instructions, the technique is found to reduce compilation time 4.817 times whilst only reducing evaluation speed by 3.656percent.},
    notes = {Also known as \cite{2002032}
    Distributed on CD-ROM at GECCO-2011.
    
    ACM Order Number 910112.},
    }
    
    
    
@inproceedings{Lewis:2011:GECCOcomp,
    author = {Tony E. Lewis and George D. Magoulas},
    title = {TMBL kernels for CUDA GPUs compile faster using PTX: computational intelligence on consumer games and graphics hardware},
    booktitle = {GECCO 2011 Computational intelligence on consumer games and graphics hardware (CIGPU)},
    year = {2011},
    editor = {Simon Harding and W. B. Langdon and Man Leung Wong and Garnett Wilson and Tony Lewis},
    isbn = {978-1-4503-0690-4},
    keywords = {genetic algorithms, genetic programming, GPU},
    pages = {455--462},
    month = {12-16 July},
    organisation = {SIGEVO},
    address = {Dublin, Ireland},
    doi = {doi:10.1145/2001858.2002033},
    publisher = {ACM},
    publisher_address = {New York, NY, USA},
    abstract = {Many of the most effective attempts to harness the power of the Graphics Processing Unit (GPU) to accelerate Genetic Programming (GP) have dynamically compiled code for individuals as they are to be evaluated. This approach executes very quickly on the GPU but is slow to compile, hence only vast data-sets fully reap its rewards.
    
    To reduce compilation time, we generate and compile code in the lower-level language PTX. We investigate this in the context of implementing Tweaking Mutation Behaviour Learning (TMBL) on the GPU. We find that for programs of 300 instructions, using PTX reduces the compile time 5.861 times and even increases the evaluation speed by 23.029percent.},
    notes = {Also known as \cite{2002033}
    Distributed on CD-ROM at GECCO-2011.
    
    ACM Order Number 910112.},
    }
    
    
    
@inproceedings{Harding:2011:GECCOcomp,
    author = {Simon Harding and Wolfgang Banzhaf},
    title = {Implementing cartesian genetic programming classifiers on graphics processing units using GPU.NET},
    booktitle = {GECCO 2011 Computational intelligence on consumer games and graphics hardware (CIGPU)},
    year = {2011},
    editor = {Simon Harding and W. B. Langdon and Man Leung Wong and Garnett Wilson and Tony Lewis},
    isbn = {978-1-4503-0690-4},
    keywords = {genetic algorithms, genetic programming, cartesian genetic programming, GPU},
    pages = {463--470},
    month = {12-16 July},
    organisation = {SIGEVO},
    address = {Dublin, Ireland},
    doi = {doi:10.1145/2001858.2002034},
    publisher = {ACM},
    publisher_address = {New York, NY, USA},
    abstract = {This paper investigates the use of a new Graphics Processing Unit (GPU) programming tool called 'GPU.NET' for implementing a Genetic Programming fitness evaluator. We find that the tool is able to help write software that accelerates fitness evaluation. For the first time, Cartesian Genetic Programming (CGP) was used with a GPU-based interpreter. With its code reuse and compact representation, implementing CGP efficiently on the GPU required several innovations. Further, we tested the system on a very large data set, and showed that CGP is also suitable for use as a classifier.},
    notes = {Also known as \cite{2002034}
    Distributed on CD-ROM at GECCO-2011.
    
    ACM Order Number 910112.},
    }
    
@inproceedings{Loiacono:2011:GECCOcomp,
    author = {Daniele Loiacono},
    title = {Fast prediction computation in learning classifier systems using CUDA},
    booktitle = {GECCO '11: Proceedings of the 13th annual conference companion on Genetic and evolutionary computation},
    year = {2011},
    editor = {Natalio Krasnogor and Pier Luca Lanzi and Andries Engelbrecht and David Pelta and Carlos Gershenson and Giovanni Squillero and Alex Freitas and Marylyn Ritchie and Mike Preuss and Christian Gagne and Yew Soon Ong and Guenther Raidl and Marcus Gallager and Jose Lozano and Carlos Coello-Coello and Dario Landa Silva and Nikolaus Hansen and Silja Meyer-Nieberg and Jim Smith and Gus Eiben and Ester Bernado-Mansilla and Will Browne and Lee Spector and Tina Yu and Jeff Clune and Greg Hornby and Man-Leung Wong and Pierre Collet and Steve Gustafson and Jean-Paul Watson and Moshe Sipper and Simon Poulding and Gabriela Ochoa and Marc Schoenauer and Carsten Witt and Anne Auger},
    isbn = {978-1-4503-0690-4},
    keywords = {Genetics based machine learning: Poster},
    pages = {169--170},
    month = {12-16 July},
    organisation = {SIGEVO},
    address = {Dublin, Ireland},
    doi = {doi:10.1145/2001858.2001953},
    publisher = {ACM},
    publisher_address = {New York, NY, USA},
    abstract = {Computing the system prediction is one of the most important and computationally expensive tasks in Learning Classifier Systems. In this paper, we provide a parallel solution to the problem of computing the prediction array in XCS using the NVIDIA's Compute Unified Device Architecture (CUDA). We performed several experiments to test our parallel solution using two different types of GPUs and to study how performances are affected by (i) the problem size, (ii) the number of problem actions, and (iii) the number of classifiers in the population. Our experimental results show a speedup that ranges from slightly less than 2X up to 32X.},
    notes = {Also known as \cite{2001953}
    Distributed on CD-ROM at GECCO-2011.
    
    ACM Order Number 910112.},
    }