-
Notifications
You must be signed in to change notification settings - Fork 0
/
ew.tex
1777 lines (1655 loc) · 119 KB
/
ew.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
\documentclass[
% reprint,
%preprint,
% 11pt,
% 10pt,
%superscriptaddress,
%groupedaddress,
%unsortedaddress,
%runinaddress,
% frontmatterverbose,
%preprintnumbers,
%nofootinbib,
%nobibnotes,
%bibnotes,
aps,
pra,
% linenumbers,
twocolumn,
% prl,
% prb,
% prd,
% rmp,
% prstab,
% prstper,
floatfix,
%longbibliography
]{revtex4-2}
% \usepackage{revquantum}
% new linux font, ignore mono
% \usepackage[mono=false]{libertine}
% \renewcommand{\baselinestretch}{1.05}
% \usepackage[top=0.7in,left=1in,bottom=1in,right=1in]{geometry}
\usepackage{amsmath,amsthm,amssymb,epsfig,graphicx,mathrsfs,amsfonts,dsfont,bbm}
% \usepackage{bbm} % for \mathbb{1}, but ruins the letter
% \usepackage{unicode-math}
% \DeclareMathOperator*{\argmax}{argmax}
% \DeclareMathOperator*{\argmin}{argmin}
\usepackage{pict2e}
\usepackage[percent]{overpic}
\usepackage{color}
\usepackage{listings}
% \usepackage{caption}
% \usepackage{subcaption}
\usepackage[caption=false]{subfig}
% \usepackage{fullpage}
\usepackage[toc,title,titletoc,header]{appendix}
\usepackage{color}
\usepackage{dcolumn}
\usepackage{bm}
\usepackage{hyperref}
\hypersetup{
citecolor=magenta,
colorlinks=true,
linkcolor=blue,
filecolor=green,
urlcolor=cyan,
}
\usepackage[capitalise]{cleveref}
\usepackage{enumitem}
\setlist[enumerate]{leftmargin=10mm, label=\alph*)}
\setlist[itemize]{leftmargin=12pt}
\usepackage{mathtools}
\usepackage{tikz}
%\usepackage{tikzit}
%\input{path_integral.tikzstyles}
\usepackage{braket}
\usepackage{physics}
% \usepackage{luatex85} % for qcircuit
\usepackage{luatex85,qcircuit}
\usepackage{blkarray}
\usepackage[linesnumbered,ruled,vlined,algosection]{algorithm2e}
\newcommand\mycommfont[1]{\footnotesize\ttfamily\textcolor{blue}{#1}}
\SetCommentSty{mycommfont}
% \setlength\parindent{0pt}
\setcounter{secnumdepth}{3}
\theoremstyle{plain}
\newtheorem{axiom}{Axiom}
\newtheorem{theorem}{Theorem}
\newtheorem{corollary}{Corollary}
\newtheorem{lemma}{Lemma}
\newtheorem{proposition}{Proposition}
\newtheorem{conjecture}{Conjecture}
\newtheorem{question}{Question}
\newtheorem{claim}{Claim}
\theoremstyle{definition}
\newtheorem{definition}{Definition}
\newtheorem*{problem*}{Problem}
\newtheorem{problem}{Problem}
\newtheorem{notation}{Notation}
\newtheorem{observation}{Observation}
\newtheorem{fact}{Fact}
\newtheorem{example}{Example}
\newtheorem{remark}{Remark}
\input{./macros.tex}
\newcommand{\kernel}{k}
\newcommand{\ew}{W}
\newcommand{\ob}{O}
\newcommand{\pob}{O}
\newcommand{\dm}{\rho}
% \newcommand{\ob}{\hat{O}}
% \newcommand{\ew}{\hat{W}}
\newcommand{\ghz}{\text{GHZ}}
\newcommand{\jsd}{\text{JS}}
\newcommand{\qjs}{\text{QJS}}
\newcommand{\kl}{\text{KL}}
\newcommand{\ml}{\text{ml}}
\newcommand{\bi}{\text{bi}}
\newcommand{\cs}{\text{cs}}
\newcommand{\svm}{\text{svm}}
\newcommand{\shadow}{\textup{shadow}}
\newcommand{\ansatz}{\textup{ansatz}}
\newcommand{\tomo}{\textup{tomo}}
\newcommand{\perm}{\textup{Perm}}
\newcommand{\target}{\textup{tar}}
\newcommand{\prepare}{\textup{pre}}
\newcommand{\entangled}{\textsc{entangled}}
\newcommand{\separable}{\textsc{separable}}
\newcommand{\noise}{\text{noise}}
\newcommand{\qnn}{\textup{QNN}}
\newcommand{\ntk}{\textup{NT}}
\newcommand{\ppt}{\textup{PPT}}
\newcommand{\bells}{\textup{BELL}}
\newcommand{\chsh}{\textup{CHSH}}
\newcommand{\bellineq}{\textup{Bell}}
\newcommand{\locc}{\textup{LOCC}}
% \newcommand{\ml}{\textup{ml}}
\newcommand{\stbz}{\hat{S}}
\newcommand{\separableset}{\mathcal{S}}
\newcommand{\si}{\hat{\sigma}_0}
\newcommand{\sx}{\hat{\sigma}_x}
\newcommand{\sy}{\hat{\sigma}_y}
\newcommand{\sz}{\hat{\sigma}_z}
\newcommand{\px}{X}
\newcommand{\py}{Y}
\newcommand{\pz}{Z}
\newcommand{\pI}{I}
\newcommand{\bmsigma}{\bm{\sigma}}
\renewcommand{\llaplacian}{\hat{\mathfrak{L}}}
%\newcommand{\zpartition}{\mathcal{Z}}
\newcommand{\hamiltonian}{\hat{H}}
\newcommand{\U}{U}
% \newcommand{\U}{\hat{U}}
\newcommand{\subgroup}{\mathbb{H}}
\newcommand{\ppartition}{\mathcal{P}}
\newcommand{\oracle}{\hat{O}}
\newcommand{\D}{\mathcal{D}}
\newcommand{\proj}{\hat{P}}
%\newcommand{\deltat}{\Delta t}
%\newcommand{\deltatau}{\Delta \tau}
\newcommand{\cz}{\textup{\textsc{cz}}}
\newcommand{\cx}{\textup{\textsc{cx}}}
\newcommand{\toffoli}{\textup{\textsc{toffoli}}}
\newcommand{\lleft}{\leftarrow}
\newcommand{\rright}{\rightarrow}
\newcommand{\intinf}{\int_{-\infty}^{\infty}}
\newcommand{\qi}[1]{\textcolor{blue}{(Qi: #1)}}
% disable subsections and subsubsections in the TOC
\makeatletter
%\def\l@subsection#1#2{}
\def\l@subsubsection#1#2{}
\makeatother
\begin{document}
%%%%%%%%%%%%%%%%%%%
\title{Towards efficient and generic entanglement detection by machine learning}
\author{Jue Xu}
\email{[email protected]}
\author{Qi Zhao}
\email{[email protected]}
%\affiliation{Department of Computer Science, University of Hong Kong.}
\affiliation{
QICI Quantum Information and Computation Initiative, Department of Computer Science,
The University of Hong Kong, Pokfulam Road, Hong Kong}
\date{\today}
%%%%%%%%%%%%%%%%%%%
\begin{abstract}
Detection of entanglement is an indispensable step to practical quantum computation and communication.
% Most of entanglement witnesses XXX, drawback. To address these problems, we use (machine learning + shadow) to noise and efficient.
Compared with the conventional entanglement witness method based on fidelity, we propose a flexible, machine learning assisted entanglement detection protocol that is robust to different types of noises and sample efficient.
In this protocol, an entanglement classifier for a generic entangled state is obtained by training a classical machine learning model with a synthetic dataset.
The dataset contains classical features of two types of states and their labels (either entangled or separable).
The classical features of a state, which are expectation values of a set of $k$-local Pauli observables, are estimated sample-efficiently by the classical shadow method.
In the numerical simulation, our classifier can detect the entanglement of 4-qubit GHZ states with coherent noise and W states mixed with large white noise, with high accuracy.
% And the features of states, the expectation values of a set of $k$-local Pauli observables are estimated efficiently in sample by the classical shadow method.
\end{abstract}
\maketitle
% \setcounter{tocdepth}{0}
% \tableofcontents
%%%%%%%%%%%%%%%Content%%%%%%%%%%%%%%%
\section{Introduction}
Entanglement \cite{horodeckiQuantumEntanglement2009} is the key ingredient of quantum teleportation \cite{bennettTeleportingUnknownQuantum1993}, quantum cryptography \cite{ekertQuantumCryptographyBased1991}, quantum computation \cite{briegelMeasurementbasedQuantumComputation2009}, and quantum metrology \cite{giovannettiQuantumenhancedMeasurementsBeating2004}.
% \qi{teleportation (B), QKD (E92), metrology, check references }
However, decoherence and imperfections are inevitable in real-world devices, which means the interaction between a quantum system and a classical environment would significantly affect entanglement quality and diminish quantum advantage in applications.
For practical purposes, it is essential to detect entanglement in certain quantum physical systems.
This problem has been widely studied \cite{guhneEntanglementDetection2009}, but still far from being perfectly solved.
Quantum tomography, as one of the most widely used certification methods, can provide the full density matrix of the prepared state. However, even given the tomography results, it is computationally intractable to determine whether the state is entangled by classical \cite{gurvitsClassicalDeterministicComplexity2003} or quantum computation \cite{gutoskiQuantumInteractiveProofs2015}.
Not alone, the sample complexity of quantum tomography
%to fully recover the density matrix of a state from experiments
grows exponentially with dimension \cite{haahSampleoptimalTomographyQuantum2017,odonnellEfficientQuantumTomography2016}.
Thus, a more realistic scenario is entanglement witnesses that can determine whether a prepared state is entangled or not with the prior knowledge of the state.
%assuming it is a known entangled state subject to realistic noises.
This task for many entangled states of practical interest can be efficiently solved by measuring a few observables \cite{bourennaneWitnessingMultipartiteEntanglement2004,tothDetectingGenuineMultipartite2005,tothEntanglementDetectionStabilizer2005}.
Though attempts such as \cite{guhneNonlinearEntanglementWitnesses2006,zhouEntanglementDetectionCoherent2020} have been made to enhance robustness to noise, entanglement witnesses will also fail when there is a lot of noise or unexpected types of noise in practice
%there is no generic, efficient solution to detect entanglement beyond witness
\cite{weilenmannEntanglementDetectionMeasuring2020}.
%\qi{fail, such as unfaithful, not generic, case-by-case}
%\qi{For proposed witness, general method to reduce LMS, esp}
Moreover, for given witnesses, it is also generally challenging to reduce the measurement efforts (sample complexity), especially for non-stabilizer states \cite{zhangEfficientEntanglementGeneration2021}.
The goal of this paper is to find an efficient and generic way to detect the entanglement of many-body quantum states.
Machine learning (ML) is a powerful tool for such a purpose.
Many ML techniques including both classical and quantum machine learning models have been proposed for classification tasks in physics, such as the classification of phases and prediction of ground states \cite{carrasquillaMachineLearningPhases2017,congQuantumConvolutionalNeural2019,huangProvablyEfficientMachine2022}.
Entanglement detection as a typical classification problem has been studied by ML techniques, such as determining separability by Neural Network (NN) \cite{luSeparabilityEntanglementClassifierMachine2018,maTransformingBellInequalities2018} and deriving generic entanglement witnesses by Support Vector Machine (SVM) \cite{zhuMachineLearningDerivedEntanglement2021,vintskevichClassificationFourqubitEntangled2022}.
% There are quantum machine learning algorithms for quantum problems (data)
Nevertheless, these prior machine learning assisted methods
only explore white noise robustness without considering other types of noises that happened in experiments.
And the sample efficiency of experimental implementation for these ML-derived classifiers has not been discussed.
% the implementation of the entanglement witnesses requires LMS
% provide an end-to-end protocol
% and do not address the problem how to efficiently extract classical features of quantum states in real experiments.
% Roughly, our solution is to make use of both machine learning techniques and some recently-developed quantum algorithms.
% Assume we would like to distinguish an entangled state incluing its `vicinity' (proximity) from undesired states (e.g., all separable states),
In this work, an ML classifier is obtained by training SVM with a synthetic dataset on a classical computer.
The dataset consists of two types of states, one is a set of certain target entangled states subject to randomly sampled noise, and the other is a set of randomly sampled separable states with the given partitions.
To increase the feasibility in experiments, each state is characterized by its expectation values of Pauli observables, called classical features.
% The classical features $\vbx$ and the label $y\in\qty{-1,1}$ of a state consist a datapoint in a dataset $\qty{(\vbx,y)}$.
% \qi{To be more specific, input data, classical machine learning. XXXX.}
% Efficient , shadow vs non shadow, better sample complexity
% our method derive such a classifier by fitting a synthetic dataset randomly sampled states from two target sets with their labels (either $\entangled$ or $\textsf{separable}$).
% Specifically, our protocol starts from evaluation of expectations of $n$-qubit Pauli observables of a set of states.
Within the framework of SVM, classification capability can be boosted by nonlinear kernel method and unimportant features can be eliminated programmatically.
%With the trained classifier at hand, it is expected that brand new samples from real experiments can be classified with high accuracy,
Furthermore, we restrict the Pauli observables to $k$-local such that classical features can be estimated with a smaller sample complexity via the classical shadow method \cite{huangPredictingManyProperties2020}.
In the numeric simulation of 4-qubit GHZ state and W state, the kernel SVM classifier exhibits better robustness to white noise than conventional fidelity witnesses and also robust to coherent noise which is more realistic in experiments but not widely studied.
% test, high accuracy.
% provide a generic and efficient solution to multipartite entanglement detection.
And the derandomized classical shadow method outperforms other schemes for estimating many $k$-local observables (features).
% \qi{Give a concrete result about the efficiency}
This paper is organized as follows: in \cref{sec:preliminaries}, we briefly present necessary definitions of multipartite entanglement, related entanglement detection problems, and mainstream methods for these problems;
\cref{sec:protocol} demonstrates our end-to-end protocol including two parts: learning an entanglement witness for a generic state from synthetic data and efficient estimation of classical features of states from experiments;
at last, numerical simulation results are discussed in \cref{sec:numerical_simulation}.
\section{Preliminaries}\label{sec:preliminaries}
% \subsection{Related works}
% \subsection{Notations}
% Notations:
% \begin{notation}
% The hats on the matrices such as
% % $\hat{A}$, Hamiltonian $\hamiltonian$,
% observables $\ob$, entanglement witness $\ew$,
% emphasize that they play the roles of operators (Hermitian matrices).
% density matrix $\dm$ (omitted)
% $\qty{I,X,Y,Z}$
% \end{notation}
% \begin{notation}
% In machine learning setting,
% denote vector (matrix) $\vbx$, $\vbw$, $\vb{K}$ by boldface font.
% % A simle (undirected, unweighted) graph $\graph=(V,E)$ is described by vertices $V$ and edges $E$.
% \end{notation}
% For specific purpose, we use different basis (representations) for quantum states.
% One is the computational basis $\qty{\ket{z}}$ with $z\in \qty[2^n]$ where $n$ is the number of qubits,
% while another useful one is the binary representation of computational basis $\qty{\ket{\vbx}\equiv\ket{x_1}\ket{x_2}\dots\ket{x_n}}$ with $x_j\in \qty{0,1}$.
% For simplicity, we let $N \equiv 2^n$ and $\ket{\vb{0}}\equiv\ket{0^n} \equiv\ket{0}^{\otimes n}$ if no ambiguity.
% \begin{notation}
% If no ambiguity,
% we omit the tensor products between subsystems and the hats on operators for readability,
% e.g.,
% % shorthand
% $\ket{\psi_A}\ket{\psi_B}\equiv \ket{\psi_A}\otimes \ket{\psi_B}$
% % Hadamard basis $\ket{+}: = (\ket{0}+\ket{1})/\sqrt{2} $.
% and $\px I \pz \equiv \hat{X} \otimes \identity \otimes \hat{Z}$.
% % and $\px^{(1)}\pz^{(3)} \equiv \hat{X} \otimes \identity \otimes \hat{Z}$.
% \end{notation}
% \begin{notation}
% Denote $\pob_{\sigma}\in \qty{I,X,Y,Z}^{\otimes n}$ for a Pauli observable.
% % Denote $\pob_{\sigma}:=\bigotimes \sigma$ for a Pauli observable.
% % where $\sigma\in \qty{I,X,Y,Z}^n$ is a string of Pauli operators.
% Denote $\vbx_{\dm,\bmsigma}:=(\Tr(\dm\pob_{\sigma_1}),\dots,\Tr(\dm\pob_{\sigma_M}))$ for expectations of $M$ Pauli observables with respect to the state $\dm$ where $\bmsigma\subseteq \qty{I,X,Y,Z}^n$.
% Denote vectors $\vbx$, $\bmsigma$, $\vbw$ by boldface font.
% \end{notation}
% \begin{notation}
% % $d$ for $d$-dimensional qudit;
% $D\equiv d^n$ as the dimension of a $n$-qudit systems.
% \end{notation}
\subsection{Multipartite entanglement}
% \subsection{Entanglement detection}
% \subsubsection{Bipartite entanglement}
Large-scale entanglement involving multiple particles may be the main resource for quantum advantages in quantum computation and communication.
Roughly, we say a quantum state $\dm$ of $n$ subsystems is \emph{entangled} if it is not fully separable,
i.e., the state cannot be written as the tensor product of all subsystems as $\dm=\rho_1\otimes\cdots\otimes\rho_n$.
% it can be written as a convex mixture of product states $\sum_j w_j \dm\otimes\dots\otimes\dm$
% \begin{definition}[fully separable]\label{def:fully_separable}
% An $n$-particle (qubit) pure state $\ket{\psi_f}$ is fully separable
% if it can be written as the tensor product of all subsystems $\qty{A_1,\dots,A_n}$, i.e., $\ket{\psi_f}=\bigotimes_{i=1}^n \ket{\phi_{A_i}}$.
% % Analoguous to \cref{eq:mixed},
% Analoguously, a mixed state $\dm_f$ is fully separable if it can be written as a convex combination of fully separable pure states.
% % (, and is (fully) $n$-separable if it is in $S_n$).
% % An $n$-qubit pure state $\ket{\psi_f}$ is $\ppartition$-fully separable $\iff$ it can be written as
% % $\ket{\psi_f}=\otimes_i^m \ket{\phi_{A_i}}$.
% % Analoguous to \cref{eq:mixed}, an $n$-qubit mixed state $\dm_f$ is $\ppartition$-fully separable $\iff$ it can be decomposed into a convex mixture of $\ppartition$-fully separable pure states.
% % % \begin{equation}
% % % \dm_f = \sum_i p_i \op{\psi_f^i}, (\forall i) ( p_i\ge 0, \sum_i p_i = 1) .
% % % \end{equation}
% % P-bi-separable... $\separableset_f^\ppartition \subset S_b^\ppartition$
% \end{definition}
Clearly, the simple statement `the state is entangled' would allow only two of the particles are entangled while the rest is in a product state, which is very weak entanglement.
So, the more interesting entanglement property is bipartite separability:
\begin{definition}[bi-separable]\label{def:bipartite_separable}
% Consider a system partitioned into two subsystems $\hilbertspace_A \otimes \hilbertspace_B$ of dimensions $d_A$ and $d_B$ respectively.
A pure state $\ket{\psi}$ is bipartite separable (bi-separable) if and only if it can be written as a tensor product form
% $\ket{\psi_b}=\ket{\phi_A}\otimes\ket{\phi_{\bar{A}}}$,
% $\ket{\psi}_{bi}=\ket{\phi^{(A)}}\otimes\ket{\phi}^{(B)}$,
$\ket{\psi}_{\bi}^{\ppartition} = \ket{\phi_{A}}\otimes\ket{\phi_{B}}$ with some bi-partition $\ppartition=\qty{A,B\equiv \bar{A}}$.
% where $\ppartition_2 = \qty{ A, B\equiv \bar{A} }$ is a bipartition of the qubits in the system.
% Otherwise, $\ket{\psi}$ is entangled.
A mixed state $\dm$ is bi-separable if and only if it can be written as a convex combination of pure bi-separable states, i.e.,
$\dm_{\bi}=\sum_i p_i \op{\psi_i}_\bi^{\ppartition_i}$
($\ppartition_i$ can be different partitions)
% \footnote{For different $i$, $\ppartition_i$ can be different partitions}
% \begin{equation}
% \dm_{\bi}=\sum_i p_i \op{\psi_i}_{bi}
% % \dm_{AB}= \sum_i \lambda_i \dm_{A,i} \otimes \dm_{B,i}
% ,\;
% \forall p_i > 0, \sum_i p_i = 1
% \label{eq:mixed}
% \end{equation}
with a probability distribution $\qty{p_i}$.
The set of all bi-separable states is denoted as $\separableset_\bi$.
% A state $\dm_{AB}$ is \emph{separable} if it can be writen as a convex combination $\dm_{AB}= \sum_i \lambda_i \dm_{A,i} \otimes \dm_{B,i}$ with a probability distribution $\lambda_i\ge 0$ and $\sum_i \lambda_i = 1$.
\end{definition}
% Note that the state $\ket{\phi}_A$ may be entangled, thus the state $\ket{\psi}$ is not necessarily \nameref{def:fully_separable}.
% \begin{definition}[genuine multipartite entanglement]\label{def:gme}
\begin{definition}[GME]\label{def:gme}
% \begin{definition}[genuine entangled]\label{def:genuinely_entangled}
On the contrary, if a state $\dm\notin \separableset_\bi$,
it possesses genuine multipartite entanglement (GME).
% A state possesses $\ppartition$-genuine entanglement if it is outside of $S_b^\ppartition$.
% A state $\dm$ possesses $\ppartition$-genuine entanglement iff $\dm\notin S_b^\ppartition$.
\end{definition}
% The size of the genuinely entangled quantum system becomes a figure of merit for assessing the advancement of quantum devices in the competition among various realizations.
% \subsubsection{Multipartite entanglement structures}
GME implies that all subsystems are indeed entangled with each other,
so it is the strongest form of entanglement.
Whereas, there is another restricted way for generalizing bi-separability to mixed states:
% In contrast to $S_{\bi}$, a state is $\ppartition_2$-separable,
if it is a mixing of pure bi-separable states with the same partition $\ppartition_2$,
and we denote the state set as $\separableset_{\bi}^{\ppartition_2}$.
It is practically interesting to study entanglement under the certain partition,
because it naturally indicates the quantum information processing capabilities among a real geometric configuration.
% Moreover, for some systems, such as distributed quantum computing, multiple quantum processor, and quantum network, natural partition exists due to the system geometric configuration.
We have a formal definition for entanglement concerning partitions:
\begin{definition}[full entanglement]\label{def:full_entanglement}
A state $\dm$ possesses full entanglement
if it is outside of the separable state set $\separableset_{\bi}^{\ppartition_2}$ for any partition,
that is, $\forall \ppartition_2 = \qty{A,\bar{A}},\dm \notin \separableset_\bi^{\ppartition_2}$.
% \begin{equation}
% \dm \notin S_b^{\ppartition_2},
% \forall \ppartition_2 = \qty{A,\bar{A}}
% \end{equation}
% is fully entangled if it is neither biseparable nor fully separable.
% \cite{guhneEntanglementDetection2009}
\end{definition}
For a state with full entanglement, it is possible to prepare it by mixing bi-separable states with different bipartitions,
so full entanglement is weaker than \nameref{def:gme} but still useful in practice.
% Since $S_b^{\ppartition_2}\subset S_\bi$, GME is a stronger claim than full entanglement.
% It is clear that $S_b^{\ppartition_2}\subset S_b$, and $S_b$ can be generated by the convex mixture of all possible $S_b^{\ppartition_2}$.
% We also remark that the recently demonstrated entanglement in the IBM cloud quantum computing \cite{wang16qubitIBMUniversal2018} is actually the full entanglement defined here.
% \begin{remark}
% P-... can be viewed as generalized versions of regular fully separable, biseparable, and genuinely entangled states, respectively.
% In fact, when $m=n$, these pairs of definitions are the same.
% By definitions, one can see that if a state is $P_m$-fully separable, it must be m-separable. Of course, an m-separable state might not be $P_m$-fully separable, for example, if the partition is not properly chosen.
% \end{remark}
% \subsubsection{entanglement measures}
% Rather than qualitatively determining (bi)separability, there are measures to quantify entanglement.
% \begin{definition}[concurrence]\label{def:concurrence}
% \end{definition}
% \begin{remark}
% the minimal eigenvalue (the absolution of this value for entangled states is named as negativity) , can be obtained analytically
% \begin{equation}
% \lambda_{\min}\qty(\dm_{\theta,\phi}^{\T_B}) =
% (1-p)/4 - p \cos(\theta/2) \sin(\theta/2)
% \end{equation}
% \cite{maTransformingBellInequalities2018}
% \end{remark}
% entanglement structure measures.
% By going through all possible partitions, one can investigate higher level entanglement structures, such as entanglement intactness (non-separability), which quantifies how many pieces in the $n$-partite state are separated.
% To benchmark our technological progress towards the generation of largescale genuine multipartite entanglement, it is thus essential to determine the corresponding entanglement depth.
% \begin{definition}[Entanglement intactness, depth]
% the entanglement intactness of a state $\dm$ to be $m$, if and only if $\dm\notin S_{m+1}$ and $\dm\in S_m$.
% When the entanglement intactness is 1, the state possesses \nameref{def:gme}; and when the intactness is $n$, the state is \nameref{def:fully_separable}.
% $k$-producible. $D$-dimensional (Schmidt rank) entangled
% \end{definition}
% \begin{example}[GHZ]\label{exm:ghz}
% bipartite: Bell states;
% nontrivial multipartite: tripartite.
% Greenberger-Horne-Zeilinger (GHZ) state: $\ket{\ghz}:=\frac{1}{\sqrt{2}}(\ket{0}^{\otimes n} + \ket{1}^{\otimes n} )$ (eight-photon) produce the five different entangled states (one from each entanglement structure/partition?):
% \begin{equation*}
% \ket{\ghz_8},\ket{\ghz_{62}},\ket{\ghz_{44}},\ket{\ghz_{422}},\ket{\ghz_{2222}}.
% \end{equation*}
% The GHZ-state is generally considered as the state with the genuine 3-partite entanglement, while the W-state has the peculiar property of having the maximal expected amount of two-partite entanglement if one party is traced out.
% % W state
% Schmidt rank, PPT criteria, entanglement witness...
% \end{example}
% \subsubsection{Unfaithful state}\label{sec:unfaithful_state}
% \subsubsection{Graph state}\label{sec:graph_state}
% \begin{remark}[??]
% The entanglement \nameref{def:entropy} $S( \dm_A )$ equals the rank of the adjacency matrix of the underlying bipartite graph, which can be efficiently calculated.
% For graph states, the reduced density matrices can be represented efficiently in terms of their stabilizer elements or their adjacency matrix.
% % \cite{heimQuantumClassicalAnnealing2015}
% \end{remark}
\subsection{Entanglement detection}
After introducing the definitions of entanglement,
the next basic question is how to determine the entanglement of a state efficiently.
Despite clear definitions, it is a highly non-trivial question for a general state.
For a general review on this subject, we refer readers to \cite{guhneEntanglementDetection2009}.
One of the most widely studied problems in this area is bi-separability.
\begin{problem}[separability]\label{prm:separability}
Given a density matrix
\footnote{
A quantum (mixed) state $\dm$ can be represented by a density matrix which is a Hermitian, positive semidefinite operator (matrix) of trace one. If the rank of $\dm$ is 1, then the state is a pure state.
}
$\dm$, to determine if it is \nameref{def:bipartite_separable} (in $\separableset_{\bi}$).
\end{problem}
% \subsubsection{Hardness of separability}
% \nameref{thm:ppt}
It is not hard to prove that if a state is bi-separable regarding $\ppartition=\qty{A,B}$, then it must have positive partial transpose (PPT),
i.e.,
the partially transposed (PT)
\footnote{
The partial transpose (PT) operation acting on subsystem $A$ is defined as
$\op{k_A,k_B}{l_A,l_B}^{\T_A} := \op{l_A,k_B}{k_A,l_B}$
where $\qty{\ket{k_A,k_B}}$ is a product basis of the joint system $\hilbertspace_{AB}$.
}
density matrix $\dm_{AB}^{\T_A}$ is positive, semidefinite \footnote{A matrix (operator) is positive, semidefinite (PSD) if all its eigenvalues are non-negative.} \cite{peresSeparabilityCriterionDensity1996,horodeckiSeparabilityMixedStates1996}.
By contrapositive, we have a sufficient condition for (bipartite) entanglement, that is
%\begin{theorem}
%\label{thm:ppt}
% The positive partial transpose (\ppt) criterion states that
if the smallest eigenvalue of partial transpose $\dm_{AB}^{\T_A}$ is negative (NPT), then the state is entangled (cannot be bi-separable with $\ppartition=\qty{A,B}$).
%\end{theorem}
We should mention that the PPT criterion is a necessary and sufficient condition for \nameref{prm:separability} only when the system dimension is low ($d_A d_B \le 6$ where $d_A$ and $d_B$ are the dimensions of two bipartite subsystems respectively) \cite{horodeckiSeparabilityMixedStates1996}.
Therefore, no general solution for the separability problem is known.
Then, a natural question is whether it is possible to solve separability approximately.
By relaxing the definition (promise a gap between two types of states), a reformulation of separability in the theoretic computer science language is
\begin{problem}[Weak membership problem for separability]\label{prm:weak_membership problem_for_separability}
Given a density matrix $\dm$ with the promise that either (i) $\dm\in \separableset_{\bi}$ or (ii) $\norm{\dm-\dm_{\bi}}\ge \epsilon$ with certain norm, decide which is the case.
\end{problem}
% Unfortunately, solving this problem classically is proved to be hard.
Unfortunately, even if we are given the complete information about a state and promised a gap (error tolerance $\epsilon$), it is still hard to determine separability approximately by classical computation.
% In order to apply the \nameref{thm:ppt}, the full density matrix must be available. However, tomography requires an exponential number of measurements.
\nameref{prm:weak_membership problem_for_separability}
% The weak membership problem for separability
is NP-Hard for $\epsilon=1/\poly(d_A,d_B)$ with respect to Euclidean norm and trace norm \footnote{
% Schatten p-norm $\norm{x}_p:= (\sum_i \abs{x_i}^p)^{1/p}$.
% Euclidean norm $l_2$ norm;
% Spectral (operator) norm $\norm{\vbx}_{\infty}$;
The Euclidean norm of a matrix $A$ is defined as $\norm{A}_2:=\sqrt{\Tr(A^\dagger A)}$.
The trace norm of $A$ is defined as $\norm{A}_{\Tr}\equiv\norm{A}_{1}:=\Tr(\abs{A})\equiv\Tr(\sqrt{A^\dagger A})$.
Correspondingly, trace distance between two density matrices is $d_{\tr}(\dm,\dm') : = \frac{1}{2} \norm{\dm-\dm'}_1$.
% The fidelity between two density matrices is defined as $F(\dm,\dm') : = \Tr(\sqrt{\sqrt{\dm}\dm'\sqrt{\dm}})\equiv\norm{\sqrt{\dm}\sqrt{\dm'}}_1$.
% Uhlmann fidelity
% linear fidelity or overlap $F(\dm,\dm'):=\tr(\dm\dm')$
} \cite{gurvitsClassicalDeterministicComplexity2003} \cite{gharibianStrongNPHardnessQuantum2009},
while there exists a quasipolynomial-time algorithm with respect to certain norm \cite{brandaoQuasipolynomialtimeAlgorithmQuantum2011}.
% \begin{theorem}[\cite{gurvitsClassicalDeterministicComplexity2003}]
% % The problem of determining whether a given quantum state is entangled lies at the heart of quantum information processing, which is an NP-hard problem in general.
% \nameref{prm:weak_membership problem_for_separability} is NP-Hard for $\epsilon=1/\poly(D)$ with respect to Euclidean \nameref{def:norm} and trace norm.
% \cite{ioannouComputationalComplexityQuantum2007}
% \cite{dohertyCompleteFamilySeparability2004}
% while there exists a quasipolynomial-time algorithm with respect to $\norm{\cdot}_{\locc}$ (and $\norm{\cdot}_2$?) \cite{brandaoQuasipolynomialtimeAlgorithmQuantum2011}.
% % \begin{itemize}
% % \item \textbf{Input}: ??
% % \item \textbf{Output}: ??
% % \end{itemize}
% \label{thm:classical_hardness}
% \end{theorem}
A notable numeric method is the powerful criteria called $k$-symmetric extension hierarchy based on SDP \cite{dohertyCompleteFamilySeparability2004} \cite{ioannouComputationalComplexityQuantum2007} \cite{navascuesPowerSymmetricExtensions2009},
which also becomes computationally intractable with growing $k$.
The quantum hardness of a series of related separability testing problems were studied in the framework of quantum interactive proofs \cite{gutoskiQuantumInteractiveProofs2015}.
Nevertheless, these hardness results do not rule out the possibility to solve it efficiently with a stronger promise (approximation) or by machine learning (heuristic) techniques powered by data.
% Though it sounds easy
% \subsubsection{Direct entanglement detection}
% method only works for bipartite (not clear how to generalize to multipartite).
% While the left-hand-side is ...
% % \begin{equation}
% % \pi: = (1,2,\dots,m).
% % \end{equation}
% We can estimate the quantities $\Re [\Tr(\dm_1\cdots \dm_m)]$ and $\Im [\Tr(\dm_1\cdots \dm_m)]$ respectively.
% to generalize the estimation beyond single-qubit states, we can either increase the width or the depth of the circuit described above.
% solve the quantum problem by quantum, without full tomography.
% \begin{definition}[entanglement spectroscopy]\label{def:entanglement_spectroscopy}
% the concept of the entanglement spectrum which is the energy spectrum of the ``entanglement Hamiltonian" $\hamiltonian_E$ defined through $\dm_A = \exp(−\hamiltonian_E )$. They pointed out that the largest eigenvalues of $\dm_A$ [30] contain more universal signatures than the von Neumann \nameref{def:entropy} or $S_2$ alone. (SWAP trick, Quantum phase estimation)
% % Entanglement spectroscopy on a quantum computer
% \cite{johriEntanglementSpectroscopyQuantum2017}:
% \end{definition}
% \begin{theorem}[\cite{quekMultivariateTraceEstimation2022}]\label{thm:multivariate_trace}
% multivariate \nameref{prm:trace_estimation} can be implemented in \textbf{constant depth}, with only linearly-many controlled two-qubit gates
% copies $\bigO(\epsilon^{-2}\log( \delta^{-1}))$ ...
% \end{theorem}
% \begin{theorem}[\cite{quekMultivariateTraceEstimation2022}]\label{thm:multivariate_trace}
% Let $\qty{\dm_1,\dots,\dm_m}$ be a set of $p$-qubit states, and fix $\epsilon > 0$ and $\delta \in (0,1)$.
% There exists a random variable $\hat{T}_p$ that can be computed using $\bigO(\epsilon^{-2}\log( \delta^{-1}))$ repetitions (copies) of a \textbf{constant-depth} quantum circuit consisting of $\bigO(mp)$ three-qubit gates, and satisfies
% \begin{equation}
% \probability\qty(\abs{\hat{T}_p - \Tr(\dm_1\cdots\dm_m)}\le \epsilon) \ge 1 -\delta.
% \end{equation}
% with only linearly-many controlled two-qubit gates
% and a linear amount of classical pre-processing.
% \end{theorem}
% Multivariate trace estimation in constant quantum depth
% \cite{quekMultivariateTraceEstimation2022}
% \begin{algorithm}[H]
% \DontPrintSemicolon
% \SetKwInOut{Input}{input}
% \SetKwInOut{Output}{output}
% \Input{(copies of) density matrix (graph state?) $\dm$,...}
% \Output{spectrum of entanglement Hamiltonian}
% \BlankLine
% \For{ $i = 1,2, \ldots, m$} {
% GHZ \tcp*{prepare GHZ}
% parallel \tcp*{estimate real and imaginary part respectively}
% % \tcc{comment in a new line}
% {\Return $\lambda$ }
% }
% \Return smallest eigenvalue of $\dm_A$
% \caption{\nameref{def:entanglement_spectroscopy} by ... quantum trace estimation}
% \label{alg:entanglement_spectroscopy}
% \end{algorithm}
% \begin{remark}[\cite{quekMultivariateTraceEstimation2022}]
% We remark that, an alternative way to estimate $\Tr( \dm^k )$ for each $k \in [m]$ is by using the method of classical shadows to obtain `classical snapshots' of $\dm$ that can be linearly combined to obtain a classical random variable whose expectation is $\Tr( \dm^k )$ (see Supplementary Material Section 6 of \cite{huangPredictingManyProperties2020}).
% However, it is \textbf{unclear to us if this method would offer savings in the quantum resources required, as the total number of times the quantum circuit needs to be run in the data acquisition phase should scale with the variance of the corresponding estimator}.
% We do not know of a concise expression for this variance for arbitrary $m$. Indeed, calculating it for just a single value of $m$ ($m = 2$) required four pages of calculations in \cite{huangPredictingManyProperties2020}.
% \end{remark}
\subsubsection{Entanglement witness based on fidelity}\label{sec:entanglement_witness}
% \subsubsection{Entanglement spectroscopy via quantum trace estimation}
% \nameref{prm:trace_estimation}
A (realistic) variant of \nameref{prm:separability} is how to determine \nameref{def:bipartite_separable} given copies of an unknown state (from experiments) rather than its full density matrix.
In this case, the sample complexity should be considered besides computational complexity.
Since the input to this problem is quantum data (states), directly estimating spectrum or entanglement monotone functions of the reduced density matrix $\rho_A:=\Tr_B(\rho_{AB})$ \cite{ekertDirectEstimationsLinear2002} \cite{horodeckiDirectDetectionQuantum2002} \cite{johriEntanglementSpectroscopyQuantum2017}, e.g., purity, negativity, and entanglement entropy, by quantum measurement and circuits \cite{wang16qubitIBMUniversal2018} \cite{quekMultivariateTraceEstimation2022} is a good option (without fully recovering density matrices).
% Rather than qualitatively determining bi-separability, there are measures to quantify entanglement, such as entanglement entropy, negativity and purity of reduced density matrices.
% $E(\Psi_{AB}):= S(\dm_A) = -\Tr(\dm_A \log \dm_A)$
% fully entangled graph state (ring of 16 qubits) IBM by measuring negativity
% \footnote{
% The well-known identity (related to the replica trick originating in spin glass theory)
% % \begin{equation}
% $\Tr(\U^{\pi} (\dm_1 \otimes \cdots \otimes \dm_m) ) =
% \Tr(\dm_1\cdots \dm_m) $
% % \end{equation}
% where the RHS is the multivariate trace and $\U^{\pi}$ is a unitary representation of the cyclic shift permutation.
% }.
However, this line of work does not provide capability beyond theoretical complexity bounds (though usually efficient for the one-side test).
% - deducing the full set of eigenvalues of $\dm_A$.
% The smallest eigenvalue of diagnoses whether $\psi_{AB}$ is separable or entangled .
% Another research direction about determining multipartite entanglement is using entanglement witnesses,
% which requires detailed priori knowledge about the state.
% This is a key distinction from the \nameref{prm:separability} problem.
The problem we study here is another variant:
\begin{problem}[entanglement detection with prior knowledge]\label{prm:entanglement_detection}
% Entanglement witness, Entanglement detection, Certify entanglement
Given copies of an unknown state $\dm$ (from experiments) that is promised either (i) $\dm\in\separableset_{\bi}$
or (ii) in `proximity' of a target $\ket{\psi_{\target}}$,
% (i.e., possesses `useful' entanglement such as \nameref{def:gme}, \nameref{def:full_entanglement}, depth ...)
% \footnote{
% For fidelity witness, promise that the state is either
% (1) the fidelity $ \Tr(\dm_{\prepare}\op{\psi_{\target}}) < \alpha$ implies $\dm\in\separableset_{\bi}$ or entangled;
% (2) the fidelity $ \Tr(\dm_{\prepare}\op{\psi_{\target}}) \ge \alpha$ implies $\dm\notin\separableset_{\bi}$
% (In other words, the trace distance $\norm{\dm_{\prepare}-\op{\psi_\target}}_1 \le \sqrt{1-\alpha}$)
% }
determine which is the case.
% \begin{itemize}
% \item \textbf{Input}: an unknown state $\dm$ (from experiments) is promised in `vicinity' of a target $\ket{\psi}_{\target}$,
% \item \textbf{Output}: $\dm$ possesses `useful' entanglement
% (\nameref{def:gme}, full entanglement, $\separableset_b^\ppartition$, intactness, depth ...) or not
% \end{itemize}
\end{problem}
The typical scenario for this problem is to prepare a pure entangled state $\ket{\psi_\target}$ in experiments and would like to detect (verify) it as true multipartite entangled.
While the preparation is not perfect,
it is reasonable to assume that the prepared mixed state $\dm_{\prepare}$ is in the proximity of the target state,
that is, $\ket{\psi_{\target}}$ undergoes noise channels restricted to white noise and local rotation (unitary).
% bit/phase-flip error, or random local unitary.
% \begin{problem}[Certify entanglement]
% Multipartite entanglement-structure detection
% \begin{itemize}
% \item \textbf{Input}: an (actual) state $\dm'$ from experiment that is close to a \textbf{known/target} (general multipartite) state $\ket{\psi}$,
% certain partition?
% \item \textbf{Output}: the certified lower-order entanglement among several subsystems could be still useful for some quantum information tasks.
% entanglement structure (intactness, depth)
% \end{itemize}
% \end{problem}
This problem is supposed to be solved efficiently because we have a much stronger promise than the separability problem.
The usual method for it is constructing an observable $W$ called \emph{entanglement witness} such that
\begin{equation}
\Tr(\ew\dm_{\bi}) \ge 0 \text{ and }
\Tr(\ew\op{\psi_{\target}}) < 0
% \Tr(\ew\dm) \ge 0 , \forall \text{ separable }\rho,\;
% \Tr(\ew\dm) < 0 , \text{ for some entangled including } \rho_{\target}
\label{eq:witness}
\end{equation}
which means that the witness $W$ has a positive expectation value on all separable states.
Hence, a negative expectation value implies the presence of entanglement (GME).
It can be proved, for every entangled state, a witness can always be constructed,
but no entanglement witness works for all entangled states \cite{heinosaariMathematicalLanguageQuantum2011}.
So, entanglement witness only provides a one-side test for separability.
% see \cref{fig:entangle} for relations.
% \subsubsection{Bell inequality}
For instance, the Bell (CHSH) inequalities originally proposed to rule out local hidden variable models,
can be regarded as an entanglement witness for many 2-qubit entangled states \cite{terhalBellInequalitiesSeparability2000}.
% \begin{definition}[Bell inequality]\label{def:bell_inequality}
% \begin{definition}[CHSH inequality]\label{def:chsh_inequality}
% Bell inequalities for graph states $\abs{\sum_{\sigma\in S} \expval{\sigma}}\le C?$...
% \end{definition}
A Bell inequality can be considered as a linear combination of Pauli observables $\ew_{\bellineq}:=\vb{w}_{\bellineq}\cdot\vb{\pob}_{\bellineq}$
such that only entangled states $\dm$ have $\abs{\Tr(\dm\ew_{\bellineq})}$ greater than a threshold
% $\ew_{\bellineq}:=\vb{w}_{\bellineq}\cdot\vb{\pob}_{\bellineq}$
% where $\vb{\pob}_{\bellineq}=\qty(\identity, XX,XZ,ZX,ZZ )$ and $\vb{w}_{\bellineq}$ are coefficients
\footnote{
The Bell (CHSH) inequality (witness):
$\vb{\pob}_{\chsh}=\qty(\identity, a b, a b', a' b, a' b' )$ with
$a = \pz, a' = \px, b = (\px-\pz)/\sqrt{2}, b = (\px+\pz)/\sqrt{2}$
% \begin{equation}
% \label{eq:chsh}
% \end{equation}
and $\vb{w}_{\chsh} = \qty(\pm 2, 1, -1, 1, 1)$
}.
% \begin{equation}
% \ew_{ml} := w_0 + w_1 a_0 b_0 + w_2 a_0 b_0' + w_3 a_0' b_0 + w_4 a_0' b_0'
% \quad, \vb{w} = \qty{\pm 2, 1, -1, 1, 1}
% \end{equation}
% \begin{remark}
% Not surprisingly, even for two-qubit systems there exist entangled states which do not violate any Bell inequality.
% \cite{maTransformingBellInequalities2018}
% \subsubsection{universal?}
% \begin{definition}[entanglement witness]\label{def:entanglement_witness}
% Given a specific entangled state $\ket{\psi_\target}$, its entanglement witness $\ew$ is an obseverable such that
% \begin{equation}
% \Tr(\ew\dm_{\bi}) \ge 0 \text{ and }
% \Tr(\ew\op{\psi_{\target}}) < 0
% \end{equation}
% \end{definition}
% Entanglement verification. Fidelities with pure target states can also serve as (bipartite) entanglement witnesses. For every (bipartite) entangled state $\dm$, there exists a constant $\alpha$ and an observable $\ob = \op{\psi}$ such that $\Tr(\ob \dm ) > \alpha \ge \Tr(\ob \dm_s )$, for all (bipartite) separable states $\dm_s$. Establishing $\Tr(\ob \dm ) > \alpha$ verifies the existence of entanglement in the state $\dm$. Any $\ob = \op{\psi}$ that satisfies the above condition is known as an entanglement witness for the state $\dm$.
% \subsubsection{Fidelity, projector-based witness, and stabilizer state}
While various methods for constructing an entanglement witness exist, the most common one is based on the fidelity between a prepared state $\dm_{\prepare}$ to the target (pure entangled) state $\ket{\psi_{\target}}$
% The usual way to construct entanglement witnesses using the knowledge of this state is
\begin{equation}
\ew_{\psi} = \alpha\identity - \op{\psi_\target}
% ,\; \alpha = \max_{\dm_{\bi}} \Tr(\dm_{\bi}\op{\psi_{\target}})
\label{eq:entanglement_witness}
\end{equation}
where $\alpha = \max_{\dm_{\bi}} \Tr(\dm_{\bi}\op{\psi_{\target}})$ is the maximal fidelity between separable states and the target entangled state such that for every separable state $\Tr(\dm_{\bi}\ew_{\psi})\ge 0$.
This kind of fidelity witness classifies states as either
(1) the fidelity $ \Tr(\dm_{\prepare}\op{\psi_{\target}}) \le \alpha$; or
(2) the fidelity $ \Tr(\dm_{\prepare}\op{\psi_{\target}}) > \alpha$ implies $\dm\notin\separableset_{\bi}$
\footnote{In other words, the trace distance $\norm{\dm_{\prepare}-\op{\psi_\target}}_1 < \sqrt{1-\alpha}$ because the fidelity and trace distance are related by the inequalities
$1-F\le d_{\tr}(\dm,\dm') \le \sqrt{1-F^2}$ (c.f. \nameref{prm:weak_membership problem_for_separability})}.
For instance, assume the target state is $\ket{\ghz}:=\frac{1}{\sqrt{2}}(\ket{0}^{\otimes n} + \ket{1}^{\otimes n}$),
the maximal overlap between GHZ and bi-separable states is $1/2$,
such that the witness \cref{eq:entanglement_witness} with $\alpha=1/2$ certifies tripartite entanglement
\cite{acinClassificationMixedThreequbit2001}.
% for any state $\dm_s$ with only bipartite entanglement, $\Tr(\ob \dm_s)\le 0.5$ ($\ge 0.5$ certifies tripartite entanglement),
% while for any state $\dm_s$ with at most $W$-type entanglement, $\Tr(\ob \dm_s)\le 0.75$.
% $\Tr(\ob \dm)> 0.75$ certifies that $\dm$ has $\ghz$-type entanglement \cite{acinClassificationMixedThreequbit2001}.
We call \cref{eq:entanglement_witness} as projector-based fidelity witness \cite{bourennaneWitnessingMultipartiteEntanglement2004}.
% \cite{huangPredictingManyProperties2020}
% \begin{remark}
% However, it is generally difficult to evaluate the quantity $\Tr(\dm_{\prepare} \op{\psi_{\target}})$ by the direct projection, because the target state is entangled.
In order to effectively measure a witness in an experiment, it is preferable to decompose the projector term into a sum of locally measurable observables such as
\footnote{
$\ew_{\ghz_3} = \frac{1}{8} \qty( 3*III - \px\px\px - \perm(I\pz \pz ) + \perm(XYY))$
% $\ew_{\ghz_3} = \frac{1}{8} \qty( 3*III - \px\px\px - \pz \pz I - \pz I\pz - I\pz \pz + XYY + YXY + YYX)$
% $\ew_{\ghz_3} := \frac{3}{2} I - \px\px\px - \frac{1}{2} \qty(\pz \pz I+ \pz I\pz + I\pz \pz )$
where $\pz \pz I\equiv \pz \otimes\pz \otimes I$ and $\perm(I\pz \pz )\equiv \pz \pz I + \pz I\pz + I\pz \pz$ for readability.
}.
% \begin{equation}
% \ew_{\ghz_3} := \frac{3}{2} I - \px\px\px
% - \frac{1}{2} \qty(\pz \pz I+ I\pz \pz + \pz I \pz )
% \label{eq:ghz_local}
% \end{equation}
% The number of local measurements in these decompositions seems to increase exponentially with the number of qubits.[??]
Meanwhile, for graph states (stabilizer states, i.e., a large class of entanglement states),
a witness can be constructed by very few local measurement settings (LMS) \footnote{For example, the observables $\pz \pz I$, $\pz I\pz$, and $I\pz \pz$ can be measured by one local measurement setting $\pz\pz\pz$.} \cite{tothDetectingGenuineMultipartite2005,tothEntanglementDetectionStabilizer2005,zhouDetectingMultipartiteEntanglement2019}
and implemented in experiments
% Related experiments: photonic implementation with a few qubits (generation, verification)
\cite{luEntanglementStructureEntanglement2018}
% optical lattice (homogeneous, restricted measurement, detect GME, nonstabilizer)
\cite{luEntanglementStructureEntanglement2018,zhouSchemeCreateVerify2022},
but non-local measurements are usually required for non-stabilizer cases (e.g., W state) \cite{zhangEfficientEntanglementGeneration2021,zhuMachineLearningDerivedEntanglement2021}.
% $C$ is hard to compute? non-stabilizer state? SWAP?
% \textbf{difficulty}: multi($n$)-partite, high-dimensional (qudit) \cite{sciaraUniversalPartiteLevel2019}, pure/mixed state, with/out prior knowledge, universal?, certain partition
\section{End-to-end entanglement detection protocol}\label{sec:protocol}
\subsection{Motivation: Beyond fidelity witness}
% \subsubsection{Motivation: Beyond fidelity witness}
In most studies of fidelity witness, the robustness measure of a fidelity witness is its tolerance to white noise:
\begin{equation}\label{eq:white_noise}
\dm
= (1-p_{\noise}) \op{\psi_{\target}} + p_{\noise} \frac{\identity}{2^{n}}
\end{equation}
where the limit of white noise (i.e., maximal $p_{\noise}$ s.t. $\Tr(\dm\ew_{\psi})<0$) indicates the robustness of the witness.
In general, there are entangled states mixed with large white noise that cannot be detected by conventional methods.
% the largest noise tolerance $p_{\text{limit}}$ just related to the \textbf{chromatic number} of the graph ($k$ local measurements) \cite{zhouDetectingMultipartiteEntanglement2019}.
% \begin{theorem}
% k local measurements. Here, k is the chromatic number (minimal \nameref{exm:colorable}) of the corresponding graph, typically, a small constant independent of the number of qubits.
% \end{theorem}
% There is a tradeoff between white noise tolerance (robustness) and efficiency (number of measurements).
For example, the maximally-entangled Bell state can maximally violate the CHSH inequality,
but Bell states that mixed with white noise doesn't violate the CHSH inequality when $ 1- 1/ \sqrt{2} < p_{\noise}<2/3 $ despite they are still entangled in this regime.
% \begin{table}[!ht]
% \centering
% \begin{tabular}{c|c|c|c|c|c|c|c|c}
% & $\ket{\ghz_3}$ & $\ket{W_3}$ & $\ket{CL_3}$ & $\ket{\psi_2}$ & $\ket{\D_{2,4}}$ & $\ket{\ghz_n}$ & $\ket{W_n}$ & $\ket{G_n}$ \\
% \hline
% maximal overlap $\alpha$ & $1/2$ & $2/3$ & $1/2$ & $3/4$ & $2/3$ & $1/2$ & $(n-1)/n$ & $1/2$ \\
% maximal $p_{\noise}$ & 4/7 & 8/21 & 8/15 & 4/15 & 16/45 & $1/2 \cdot (1-1/2^n)^{-1}$ & $1/n \cdot (1-1/2^n)^{-1}$ & $1/2 \cdot (1-1/2^n)^{-1}$ \\
% \# local measurements & 4 & 5 & 9 & 15 & 21 & $n+1$ & $2n-1$ & depend on graphs \\
% \hline
% \end{tabular}
% \caption{Results on local decompositions of different entanglement witnesses for different states. \cite{guhneEntanglementDetection2009}}
% \label{tab:summary_fidelity_witness}
% \end{table}
For 3-qubit GHZ states mixed with white noise, we can analytically compute the white noise threshold for NPT (implies bipartite entanglement):
when $p_{\noise}<0.8$, the states cannot be \nameref{def:bipartite_separable} with respect to any partition (that is \nameref{def:full_entanglement}).
However, the conventional fidelity witness only detects \nameref{def:gme} when $p_{\noise}<4/7$ for GHZ states \cite{guhneEntanglementDetection2009}.
So, it would be practically interesting to have a witness for this white noise regime $p_\noise\in[4/7,0.8)$
\footnote{The corresponding white noise regime for W state is $p_\noise\in[8/21,0.791)$}
that beyond the capability of conventional fidelity witnesses.
% \begin{example}[inequality]
% To be more specific, the maximally-entangled state, such as $\ket{\psi_0} = ( \ket{00} - \ket{11} ) / \sqrt{2}$ for a pair of qubits, can maximally violate the CHSH inequality.
% However, this tool fails under the circumstances of noise, in the form of a quantum channel. After passing through a depolarizing channel, the resulting state,
% \begin{equation}
% \dm_{wn} = (1-p_{\noise}) \op{\psi_-} + p_{\noise} \frac{\identity}{4}
% \end{equation}
% where $0 \le p_{\noise} \le 1$, violates the \nameref{def:chsh_inequality} only if $p_{\noise} < 1- 1/ \sqrt{2}$. However, the state is entangled when $p_{\noise} < 2/3$.
% \end{example}
% Bell inequalities are not suited to this aim in general. Multiseparable and biseparable states violate known Bell inequalities less than $n$-partite GHZ states. However, for $n > 3$ there exist even pure $n$-partite entangled states with a lower violation than biseparable states \cite{bourennaneWitnessingMultipartiteEntanglement2004}.
% Mermin's inequality?
% \end{remark}
% \begin{proposition}[Section 6.3 of \cite{heinosaariMathematicalLanguageQuantum2011}]
% A state $\dm$ is separable iff $\forall \ew,\Tr[\dm \ew]\ge 0$.
% Corollary, a state $\dm$ is entangled $\iff$ $\exists \ew,\Tr[\dm \ew]<0$.
% There is no entanglement witness that detects all entangled states.
% % any individual entanglement witness leaves many entangled states undetected.
% \end{proposition}
% \cite{sciaraUniversalPartiteLevel2019}
% % \begin{remark}[universal entanglement witness]
% % Since the witness tests for a specif state, a successful measurement of the operator also provides information about the state structure and phase, rather than only confirming the presence of entanglement.
% For example, a witness specifically designed for a four-qubit compact cluster state [16] confirms, when its expectation value is negative, the presence of that particular state having a very specific density function, while a positive measured expectation value of that operator only provides information that the tested state is not a compact cluster state.
% Indeed, the same witness, if applied to a four-qubit linear cluster or GHZ [17] states, would result in a positive measured expectation value, even though these two states are both highly entangled [17, 18].
% % Bell’s theorem without inequalities, Am. J. Phys. 58, 1131 (1990).
% % G. Tóth and O. Gühne, Entanglement detection in the stabilizer formalism
% Hence, a witness is a threshold test that can only detect the presence of a specific state.
% In contrast to an entanglement monotone (e.g. the entanglement entropy [6]), which determines the amount of entanglement, a witness cannot be used to quantify entanglement.
% % \end{remark}
% \end{remark}
% graph state, stabilizer \cite{zhouDetectingMultipartiteEntanglement2019}
% \begin{proposition}[\cite{zhouDetectingMultipartiteEntanglement2019}]
% Given a graph state $\ket{G}$ and a partition $\mathcal{P}=\qty{A_i}$, the \nameref{def:fidelity} between $\ket{G}$ and any \nameref{def:fully_separable} is upper bounded by
% \begin{equation}
% \Tr(\op{G} \dm_f) \le \min_{\qty{A,\bar{A}}} 2^{-S(\dm_A)}
% \end{equation}
% where $S(\dm_A)$ is the von Neumann \nameref{def:entropy} of the reduced density matrix $\dm_A=\Tr_{\bar{A}}(\op{G})$.
% \end{proposition}
% generalize \cite{zhangEfficientEntanglementGeneration2021}
% stabilizer state, neural network state \cite{gaoEfficientRepresentationQuantum2017}?
% \begin{proposition}[Entanglement witness for graph state]
% \nameref{def:bell_inequality}
% \begin{equation}
% \ew = \frac{C}{2^N} \identity_V - \op{G}
% \end{equation}
% Let $\ket{G}$ be a graph state corresponding to a connected graph. Then
% \begin{equation}
% \ew_1^{ab} = \identity_V - K_a - K_b
% \end{equation}
% is an entanglement witness for the $\ket{G}$ that detects entanglement in the reduced state $\dm_G^A (A=N_a\cup N_b \cup \qty{a,b})$ with only two measurement settings and thus can rule out full separability of the total graph state???.
% The entanglement witness
% \begin{equation}
% \ew_2 = (N-1)\identity_V - \sum_{a\in V} K_a
% \end{equation}
% detects \nameref{def:gme}.
% \end{proposition}
Other than white noise, an other typical noise that happens in (photonic) experiments is coherent noise, such as local rotations.
% while entanglement property is not affected by local unitary.
% other noise (depolarization)? e.g., flip error, phase error?, local, random unitary transformation?
Take $n$-qubit GHZ state as an example, unconscious phase accumulation and
rotation on the first control qubit can be modeled as
\cite{zhouEntanglementDetectionCoherent2020}
% $\ket{\psi_\phi}=\frac{1}{\sqrt{2}}\qty(\ket{000}+e^{\ii \phi}\ket{111})$;
\begin{equation}
\ket{\ghz(\phi,\theta)}=
\cos\theta\ket{0}^{\otimes n}+e^{\ii \phi}\sin\theta\ket{1}^{\otimes n}.
\label{eq:coherent_noise}
\end{equation}
In a certain noise regime (see Fig. 3 of \cite{zhouEntanglementDetectionCoherent2020}), $\ket{\ghz(\phi,\theta)}$ cannot be detected by conventional fidelity witness because the coherent noise diminishes the fidelity but not change entanglement property.
% (post measurement processing can remedy)
\begin{figure*}[!ht]
\centering
\subfloat{%
\includegraphics[width=1.0\columnwidth]{./fidelity_witness_compare_2_long.png}%
}\hfill
\subfloat{%
% \subfloat[\label{subfig:b}]{%
\includegraphics[width=0.9\columnwidth]{./faithfulness_2_qubit.png}%
}
% \begin{subfigure}{0.49\textwidth}
% \centering
% % \includegraphics[width=.9\linewidth]{.pdf}
% \includegraphics[width=.9\linewidth]{./Code/fidelity_witness_compare_2_long.png}
% \end{subfigure}
% \begin{subfigure}{0.45\textwidth}
% \centering
% \includegraphics[width=.9\linewidth]{./Code/faithfulness_2_qubit.png}
% \end{subfigure}
\caption{Examples of the entanglement states cannot be detected by conventional fidelity witnesses. (a) GHZ states with coherent noise sampled with $\theta\in[0,\pi/3]$ and $\phi\in[0.5\pi,0.6\pi]$ cannot be detected by the GHZ projector fidelity witness $W_{\ghz}$ \cref{eq:entanglement_witness}. Entangled states should be on the left of the dashed vertical line, i.e., have a negative expectation value of the witnesses $\Tr(\dm\ew)$. (b) Similarly, full entanglement of W states with large white noise $p_{\noise}\in[8/21,0.5]$ cannot be detected by $W_{\text{w}}$. And we can see W states with white noise has $\Tr(\dm_{\text{W}}\ew_{\ghz})>0$, vice versa. (c) Unfaithfulness of 2-qubit states: $10^4$ randomly sampled 2-qubit states are categorized according to the minimal eigenvalue of partial transpose $\dm_{AB}^{\T_A}$ and the maximal eigenvalue of $\chi_2(\dm_{AB})$.}
\label{fig:conventional_witness}
% \caption{(a) compare different methods: Bell inequality, witness, ML ansatz; different white noise limit, unfaithful state; }
\end{figure*}
To formally characterize the cases beyond fidelity witness, Weilenmann et. al \cite{weilenmannEntanglementDetectionMeasuring2020} \cite{huOptimizedDetectionHighDimensional2021} coined the term \emph{unfaithful states}
which systematically analyzes a 2-qudit entangled state mixed with white noise that cannot be detected by fidelity witness.
They found that for $d \ge 3$ that almost all states in the Hilbert space are unfaithful.
% For $d > 5$, the authors find that all states they generated are entangled but at the same time unfaithful, regardless of what metric is used to sample them.
% faithful states are useful for quantum teleportation.
% This shows that fidelity-based entanglement witnesses detect entanglement that is useful for teleportation.
Subsequently, G\"{u}the et. al \cite{guhneGeometryFaithfulEntanglement2021} \cite{riccardiExploringRelationshipFaithfulness2021} gave a formal definition:
% the faithfulness of a twoqubit state, allowing for a physical interpretation of unfaithful two-qubit states as exactly those entangled states that are not useful for teleportation.
% \begin{definition}[unfaithful state]\label{def:unfaithful_state}
% informally, unfaithful state are entangled states but cannot be detected by fidelity witness.
a 2-qudit state $\dm_{AB}$ is faithful if and only if there are local unitary transformations $\U_A$ and $\U_B$ such that
$\expval{\U_A\otimes\U_B \dm_{AB} \U_A^\dagger \otimes\U_B^\dagger}{\phi^+}> \frac{1}{d}$.
% \begin{equation}
% \expval{\U_A\otimes\U_B \dm_{AB} \U_A^\dagger \otimes\U_B^\dagger}{\phi^+}
% > \frac{1}{d}.
% \end{equation}
% \end{definition}
Consequently, they found a necessary and sufficient condition for 2-qubit unfaithfulness:
% determined by the spectrum of
a 2-qubit state $\dm_{AB}$ is faithful if and only if the maximal eigenvalue of
\begin{equation}
\mathcal{X}_2( \dm_{AB})=\rho_{AB}-\frac{1}{2}(\dm_{A}\otimes I + I \otimes \dm_{B})+\frac{1}{2} I \otimes I
\label{eq:unfaithful_2qubit}
\end{equation}
is larger than 1/2.
We can see in (c) of \cref{fig:conventional_witness}, even for 2-qubit states, nonnegligible portion of randomly sampled states are unfaithful but still entangled (NPT).
% \begin{figure}[!ht]
% \centering
% \includegraphics[width=.9\linewidth]{./Code/faithfulness_2_qubit.png}
% % \caption{PPT criterion (2-qubit random density matrix)}
% \caption{Unfaithfulness of 2-qubit states: $10^4$ randomly sampled 2-qubit states are categorized according to the minimal eigenvalue of partial transpose $\dm_{AB}^{\T_A}$ and the maximal eigenvalue of $\chi_2(\dm_{AB})$.}
% \label{fig:unfaithfulness}
% \end{figure}
Although there are variants of witness, such as nonlinear witness \cite{guhneNonlinearEntanglementWitnesses2006} and post-processing \cite{zhanDetectingEntanglementUnfaithful2021}, designed to remedy the shortcomings of conventional fidelity witness respectively,
% \cite{huOptimizedDetectionHighDimensional2021}
it would be meaningful in practice to find a generic method to construct witnesses (classifiers) for \nameref{prm:entanglement_detection}.
% Moreover, they can only be applied to bipartite systems, which means they cannot be generalized to detect genuine entanglement in multipartite states.
Machine learning techniques suit the needs well because supervised learning can be regarded as a powerful nonlinear post-processing tool.
\subsection{Training a generic witness via kernel SVM}
% \section{End-to-end entanglement detection protocol}\label{sec:protocol}
% \section{Classical-quantum hybrid, end-to-end detection protocol}
% \section{Classical, data-powered, and quantum algorithms}
% In this paper, we focus on the entanglement structure dectection for graph states.
One basic task in classical machine learning (ML) is binary classification,
such as cat/dog image classification.
In this case, the input to a ML algorithm is a (training) dataset $\qty{(\vbx^{(i)},y^{(i)})}_{i=1}^m$ consists of $m$ data points,
where each data point is a pair of feature vector $\vbx\in \realnumber^d$ of $d$ features and its label $y\in\qty{-1,1}$.
For example, the feature $\vbx$ of an image is a flattened vector of all pixel values and the label $y=-1$ for \textsc{cat} images ($1$ for \textsc{dog}).
It is clear that \nameref{prm:separability} or \nameref{prm:entanglement_detection} problem are exactly such binary classification problems where each quantum state has a binary label, such as either `\entangled' or `\separable'.
The features $\vbx$ of a quantum state $\dm$ can be the entries of its density matrix, or more realistically, the expectation values of selected observables.
% formally in \cref{sec:svm}
% The quantum extension of this problem (classficiation/pattern recognition) is to replace the data points with density matrices of quantum states .
% Specifically, a quantum state classifier outputs a label associated with the state, such as, $\entangled$ or `bi-separable'.
With the surge of research on ML,
classification tasks related to entanglement have been performed by ML algorithms.
Lu et. al \cite{luSeparabilityEntanglementClassifierMachine2018}
trained a (universal) \nameref{prm:separability} classifier by classical neural network
where features of $\vbx$ are the entries of density matrices.
% where input dataset are (randomly sampled) density matrices with labels?.
For the similar purpose, Ma and Yung \cite{maTransformingBellInequalities2018} generalized Bell inequalities to a Bell-like ansatz $\ew_{\ml}:=\vbw_{\ml}\cdot\vb{\pob}_{\bellineq}$ where the optimal weights $\vbw_{\ml}$ are obtained via optimizing a neural network.
% an ansatz for \nameref{def:entanglement_witness}
% Different Bell inequalities can be regarded as entanglement witness for different types of entanglement in a multi-party entangled state.
% (graph state entanglement)
And they found the tomographic ansatz
\begin{equation}
\Tr(\dm\ew_{\ml}) \equiv\expval{\ew_{\ml}} \equiv
\vbw_{\ml} \cdot \expval{\vb{\pob}_{\sigma}},
% \; \forall \sigma \in \qty{I,X,Y,Z}^n
% \sum_{k_1,k_2,\dots,k_n} w_{k_1,k_2,\dots,k_n} \bigotimes_i^n \hat{\sigma}^{(k_i)}
% ,\quad \hat{\sigma} \in \qty{\sx,\sy,\sz,\identity_{2\times 2}}
% \sum_{\vb{p}\in \qty{I,X,Y,Z}^n} w_{\vb{p}} \bigotimes_i^n \vb{p}_i
% \hat{\sigma}^{(\vb{p}_i)}
\label{eq:tomographic_ansatz}
\end{equation}
not only has better performance than the Bell-like ansatz,
also required \cite{luTomographyNecessaryUniversal2016} for training a universal \nameref{prm:separability} classifier,
where the feature vector $\vbx_{\dm,\bmsigma}:=\expval{\vb{\pob}_{\sigma}}$ denotes the expectations of all $4^n$ Pauli observables
\footnote{
Denote $\pob_{\sigma}\in \qty{I,X,Y,Z}^{\otimes n}$ for a Pauli observable.
Denote $\vbx_{\dm,\bmsigma}:=(\Tr(\dm\pob_{\sigma_1}),\dots,\Tr(\dm\pob_{\sigma_M}))$ for a vector of expectations of $M$ Pauli observables $\bmsigma\subseteq \qty{I,X,Y,Z}^n$ measured on $\dm$.
}.
% c.f. \nameref{prm:full_tomography}
It is worth noting that training such a universal classifier for high-dimensional systems needs a large training dataset and long time if the gap between two state sets is small.
\begin{figure*}[!ht]
\centering
\centering
\includegraphics[width=.56\linewidth]{schematic.png}
% \includegraphics[width=.9\linewidth]{gme.jpg}
% . \includegraphics[width=.7\linewidth]{witness.png}
% \includegraphics[width=.8\linewidth]{sep.jpg}
% \includegraphics[width=.9\linewidth]{ppt.jpg}
\caption{Schematic diagram for different entanglement detection methods: the colored ellipses with dotted boundary indicate the vicinity (white noise) of certain entangled states such as GHZ, and W states. Conventional fidelity witnesses for different states are depicted by colored dashed lines (hyperplanes in feature space). An entangled state with large white noise or coherent noise (local rotation depicted by a curve) cannot be detected by conventional fidelity witnesses. SVM without a nonlinear kernel is a hyperplane separating two sets of colored dots (synthetic dataset). The data points on the boundaries (dashed black lines) are called support vectors. The distance between the SVM hyperplane and boundary is the margin to be minimized via optimization. The PPT criterion is a nonlinear but one-side classifier without prior knowledge. The circles in each ellipses indicate the sampled states for training.}
\label{fig:entangle_schematic}
\end{figure*}
% \subsection{Training a generic witness via SVM}
In this paper, we focus on solving the \nameref{prm:entanglement_detection} problem with training data.
In other words, we derive the entanglement witness (classifier) for certain target states with desired entanglement structure by fitting a synthetic dataset.
\begin{problem}[learning an entanglement witness]\label{prm:learn_witness}
% It aims to learn a witness for \nameref{prm:entanglement_detection} of the target entangled state $\ket{\psi_{\target}}$ from a synthetic dataset
% \hfill
\;
\begin{itemize}
\item \textbf{Input}: a dataset $\qty{\qty(\dm^{(i)},y^{(i)})}$ consist of entangled states $\dm$ around $\ket{\psi_{\target}}$ with label $y=-1$ and randomly sampled bi-separable states with label $1$.
\item \textbf{Output}: a classifier $f(\vbx_{\dm,\tilde{\bmsigma}})$ with high training accuracy where $\tilde{\bmsigma}$ is a subset of all Pauli observables and $\vbx_{\dm,\tilde{\bmsigma}}$ is a vector of corresponding expectation values.
% (minimal)
\end{itemize}
\end{problem}
% \begin{problem}[detect graph state entanglement structure?]
% problem with/without training data
% \begin{itemize}
% \item \textbf{Input}: a graph $\graph$ encoding in a graph state $\ket{\graph}$;
% adjacency matrix $A$?
% \item \textbf{Output}: entanglement structure: \textsf{\nameref{def:gme}}??
% \end{itemize}
% % with training data:
% \end{problem}
% \begin{itemize}
% \item \textbf{features}: classical shadow?
% \item label:
% \end{itemize}
% \subsubsection{Variational quantum kernel estimation (hybrid)}
% \subsection{Variational (hybrid) quantum algorithms}
% \subsubsection{Related works}
% \begin{itemize}
% \item
% (feature: synthetic density matrix with noise flatten as a real vector $\vbx\in\realnumber^{d_A^2d_B^2-1}$).