@article{592bf994613a429e80c8abe5946bbf74,
title = "A High-Quality Genome Assembly of Striped Catfish (Pangasianodon hypophthalmus) Based on Highly Accurate Long-Read HiFi Sequencing Data",
abstract = "The HiFi sequencing technology yields highly accurate long-read data with accuracies greater than 99.9% that can be used to improve results for complex applications such as genome assembly. Our study presents a high-quality chromosome-scale genome assembly of striped catfish (Pangasianodon hypophthalmus), a commercially important species cultured mainly in Vietnam, integrating HiFi reads and Hi-C data. A 788.4 Mb genome containing 381 scaffolds with an N50 length of 21.8 Mb has been obtained from HiFi reads. These scaffolds have been further ordered and clustered into 30 chromosome groups, ranging from 1.4 to 57.6 Mb, based on Hi-C data. The present updated assembly has a contig N50 of 14.7 Mb, representing a 245-fold and 4.2-fold improvement over the previous Illumina and Illumina-Nanopore-Hi-C based version, respectively. In addition, the proportion of repeat elements and BUSCO genes identified in our genome is remarkably higher than in the two previously released striped catfish genomes. These results highlight the power of using HiFi reads to assemble the highly repetitive regions and to improve the quality of genome assembly. The updated, high-quality genome assembled in this work will provide a valuable genomic resource for future population genetics, conservation biology and selective breeding studies of striped catfish.",
keywords = "chromosome-scale genome assembly, HiFi reads, selective breeding, striped catfish",
author = "Hai, {Dao Minh} and Yen, {Duong Thuy} and Liem, {Pham Thanh} and Tam, {Bui Minh} and Huong, {Do Thi Thanh} and Hang, {Bui Thi Bich} and Hieu, {Dang Quang} and Garigliany, {Mutien Marie} and Wouter Coppieters and Patrick Kestemont and Phuong, {Nguyen Thanh} and Fr{\'e}d{\'e}ric Farnir",
note = "Funding Information: Funding: This work was part of PANGAGEN project, supported by ARES‐CCD (Acad{\'e}mie de Re‐ cherche et d{\textquoteright}Enseignement Sup{\'e}rieur—Commission de la Coop{\'e}ration au D{\'e}veloppement) and funded by the Belgian Development Cooperation (DRP/TPS 2017). Funding Information: This work was part of PANGAGEN project, supported by ARES-CCD (Acad{\'e}mie de Recherche et d{\textquoteright}Enseignement Sup{\'e}rieur—Commission de la Coop{\'e}ration au D{\'e}veloppement) and funded by the Belgian Development Cooperation (DRP/TPS 2017). We thank the staff of Genomics Platform, GIGA, University of Liege, the Norwegian Sequencing Centre, University of Oslo, Norway for sequencing DNA samples, and the Consortium des {\'E}quipements de Calcul Intensif (C{\'E}CI) for providing computational resources. Publisher Copyright: {\textcopyright} 2022 by the authors. Licensee MDPI, Basel, Switzerland.",
year = "2022",
month = may,
doi = "10.3390/genes13050923",
language = "English",
volume = "13",
journal = "Genes",
issn = "2073-4425",
publisher = "MDPI AG",
number = "5",
}