@article{Umanets_Voinyk_Pavlov_Nastenko_2018, title={Estimation of Algorithms Efficiency in the Task of Biological Objects Clustering}, volume={2}, url={http://ibb.kpi.ua/article/view/133466}, DOI={10.20535/ibb.2018.2.2.133466}, abstractNote={<p class="articleabstractruCxSpFirst"><strong>Background.</strong> The task of determining the functional relationship between biophysical parameters is an integral part of the actual problem of finding the optimal impact on a biological object and is currently not completely resolved. One of the important tasks in this area is the partitioning of the original feature space into such areas (clusters) that relate to different functional relationships linking biophysical parameters and have, in general, an arbitrary shape. Such clusters in the future is logical to call functional. To obtain and analyze the functional clusters, there are a number of algorithms, each of which has its advantages and disadvantages. At the same time, the solution of a certain practical problem requires an evaluation of the efficiency of the algorithms in terms of the cluster separation adequacy.</p><p class="articleabstractruCxSpMiddle"><strong>Objective.</strong> In this paper, for a general example of the biological objects clustering problem (Fischer’s Iris Data Set), the efficiency of a typical clustering tools series is evaluated. The application of k-means classical algorithm, the Ward algorithm and developed in this work the fuzzy version of clustering for the k-means algorithm with a limited mass of the working area for the clusters’ formation was considered.</p><p class="articleabstractruCxSpMiddle"><strong>Methods.</strong> The algorithm includes a procedure for a priori estimation of the clusters quantity. The estimation is carried out according to the frequency histogram. To determine the optimal number of the histogram columns, the application of the Scott formula is justified. The algorithm allows forming clusters of arbitrary configuration with obtaining the value of the object’s membership measure for each of the clusters. The comparative testing of the above algorithms was carried out on Fisher’s Iris Data Set.</p><p class="articleabstractruCxSpLast"><strong>Results.</strong> The best value of F<sub>1</sub>-score is obtained for the algorithm proposed in this paper: <em>F</em><sub>1</sub> = 0.92, the value <em>F</em><sub>1</sub> = 0.90 is obtained for the Ward method and the value <em>F</em><sub>1</sub> = 0.88 – for the classical <em>k</em>-means algorithm.</p><strong>Conclusions. </strong>The obtained test results on the analysis problem of arbitrary-shaped clusters made it possible to give preference to the version of fuzzy <em>k</em>-means with a limited mass of the working area for the clusters’ formation. The calculating of the membership measure value allows us to obtain additional information on the structure of cluster formations, as well as to correct the result of clustering of k-means with a limited mass, which is especially important since the formation of clusters occurs in a single pass. Comparing the computational resources required for computing algorithms with relatively close test results also makes it possible to give preference to the developed algorithm. Compared with the Ward algorithm, it requires fewer computing resources since no additional memory is needed to store the distance matrix and no time is required to recalculate it.}, number={2}, journal={Innovative Biosystems and Bioengineering}, author={Umanets, Vitalii and Voinyk, Bogdan and Pavlov, Volodymyr and Nastenko, Ievgen}, year={2018}, month={Jun.}, pages={84–89} }