@inproceedings{8b8ce6172985460e8901c2b2ee419393,
title = "Att-Sinkhorn: Multimodal Alignment with Sinkhorn-based Deep Attention Architecture",
abstract = "Multimodal alignment aims to establish a matching relationship between multimodal features, connecting parts of different modalities that contain the same or similar semantics. To increase the accuracy of the alignment of multimodal features, we propose a modality alignment method based on the Sinkhorn metric and attention mechanism - Att-Sinkhorn algorithm. The algorithm converts the alignment and matching problem between different modal features into the discrete Monge problem in the optimal transport, which compares the distance between the probability distributions corresponding to different modalities directly. In order to get practical solutions, the algorithm obtains the approximate solution of the original discrete Monge problem by introducing entropy regularization to perform Kantorovich relaxation. The transformed alignment problem can be considered as a matrix scaling problem based on the principle of mass conservation, and the Sinkhorn algorithm is used for iterative solutions. To verify the effectiveness of the Att-Sinikhorn algorithm, we adopt experiments on a typical task in multimodal alignment, image captioning, which requires mapping between textual and visual information. Empirical results and analysis indicate the effectiveness of the Att-Sinkhorn algorithm for multimodal alignment.",
keywords = "Multimodal machine learning, Sinkhorn algorithm, attention mechanism, modality alignment, visual captioning",
author = "Qianxia Ma and Ming Zhang and Yan Tang and Zhen Huang",
year = "2023",
month = oct,
day = "16",
doi = "10.1109/icac57885.2023.10275301",
language = "English",
isbn = "979-8-3503-3586-6",
series = "2023 28th International Conference on Automation and Computing (ICAC)",
publisher = "IEEE",
booktitle = "ICAC 2023 - 28th International Conference on Automation and Computing",
address = "United States",
note = "2023 28th International Conference on Automation and Computing (ICAC) ; Conference date: 30-08-2023 Through 01-09-2023",
}