@article{Susuri2016, author = {Arsim Susuri and Mentor Hamiti and Agni Dika}, title = {The Class Imbalance Problem in the Machine Learning Based Detection of Vandalism in Wikipedia across Languages}, journal = {Advances in Science, Technology and Engineering Systems Journal}, year = {2016}, volume = {2}, number = {1}, pages = {16–22}, doi = {10.25046/aj020103}, url = {https://www.astesj.com/v02/i01/p03/}, language = {en}, publisher = {ASTES Publishers}, abstract = {

This paper analyses the impact of current trend in applying machine learning in detection of vandalism, with the specific aim of analyzing the impact of the class imbalance in Wikipedia articles. The class imbalance problem has the effect that almost all the examples are labelled as one class (legitimate editing); while far fewer examples are labelled as the other class, usually the more important class (vandalism). The obtained results show that resampling strategies: Random Under Sampling (RUS) and Synthetic Minority Oversampling TEchnique (SMOTE) have a partial effect on the improvement of the classification performance of all tested classifiers, excluding Random Forest, on both tested languages (simple English and Albanian) of the Wikipedia. The results from experimentation extended on two different languages show that they are comparable to the existing work.

}, keywords = {Machine learning, Wikipedia, Vandalism, Class imbalance} }