@Article{CSIAM-AM-2-81,
author = {Hou , JunShin , Yeonjong and Xiu , Dongbin},
title = {Identification of Corrupted Data via $k$-Means Clustering for Function Approximation},
journal = {CSIAM Transactions on Applied Mathematics},
year = {2021},
volume = {2},
number = {1},
pages = {81--107},
abstract = {<p style="text-align: justify;">In addition to measurement noises, real world data are often corrupted by
unexpected internal or external errors. Corruption errors can be much larger than
the standard noises and negatively affect data processing results. In this paper, we
propose a method of identifying corrupted data in the context of function approximation. The method is a two-step procedure consisting of approximation stage and
identification stage. In the approximation stage, we conduct straightforward function
approximation to the entire data set for preliminary processing. In the identification
stage, a clustering algorithm is applied to the processed data to identify the potentially corrupted data entries. In particular, we found $k$-means clustering algorithm to
be highly effective. Our theoretical analysis reveals that under sufficient conditions the
proposed method can exactly identify all corrupted data entries. Numerous examples
are provided to verify our theoretical findings and demonstrate the effectiveness of the
method.</p>},
issn = {2708-0579},
doi = {https://doi.org/10.4208/csiam-am.2020-0212},
url = {http://global-sci.org/intro/article_detail/csiam-am/18655.html}
}