@@ -49,6 +49,9 @@ public function __toArray()
4949
5050 /**
5151 * Parse the output of dump_data into something usable.
52+ *
53+ * The expected string looks similar to this:
54+ *
5255 * InfoBegin
5356 * InfoKey: Creator
5457 * InfoValue: Adobe Acrobat Pro DC 15.0
@@ -62,67 +65,74 @@ public function __toArray()
6265 * BookmarkTitle: First bookmark
6366 * BookmarkLevel: 1
6467 * BookmarkPageNumber: 1
68+ * BookmarkBegin
69+ * BookmarkTitle: Second bookmark
70+ * BookmarkLevel: 1
71+ * BookmarkPageNumber: 2
6572 *
6673 * @param $dataString
6774 * @return array
6875 */
6976 private function parseData ($ dataString )
7077 {
71- $ expectType = null ;
72- $ output = array ('Info ' => array (),'Bookmark ' => array (),'PageMedia ' => array ());
73- $ field = array ();
74- $ buffer = array ();
78+ $ output = array ();
7579 foreach (explode (PHP_EOL , $ dataString ) as $ line ) {
7680 $ trimmedLine = trim ($ line );
77- if ($ trimmedLine === 'InfoBegin ' ) {
78- $ expectType = 'Info ' ;
79- continue ;
80- }
81- if ($ trimmedLine === 'BookmarkBegin ' ) {
82- $ expectType = 'Bookmark ' ;
83- continue ;
84- }
85- if ($ trimmedLine === 'PageMediaBegin ' ) {
86- $ expectType = 'PageMedia ' ;
87- continue ;
88- }
89-
90- preg_match ('/([^:]*): ?(.*)/ ' , $ trimmedLine , $ match );
91- $ key = $ match [1 ];
92- $ value = $ match [2 ];
93-
94- if ($ expectType === 'Info ' ) {
95- if ($ key === 'InfoKey ' ) {
96- $ buffer ['Key ' ] = $ value ;
97- } elseif ($ key === 'InfoValue ' ) {
98- $ buffer ['Value ' ] = $ value ;
81+ // Parse blocks of the form:
82+ // AbcBegin
83+ // AbcData1: Value1
84+ // AbcData2: Value2
85+ // AbcBegin
86+ // AbcData1: Value3
87+ // AbcData2: Value4
88+ // ...
89+ if (preg_match ('/^(\w+)Begin$/ ' , $ trimmedLine , $ matches )) {
90+ // Previous group ended - if any - so add it to output
91+ if (!empty ($ group ) && !empty ($ groupData )) {
92+ $ output [$ group ][] = $ groupData ;
9993 }
100- if ( isset ( $ buffer [ ' Value ' ], $ buffer [ ' Key ' ])) {
101- $ output [ ' Info ' ][ $ buffer [ ' Key ' ]] = $ buffer [ ' Value ' ];
102- $ buffer = array ();
103- $ expectType = null ;
94+ // Now start next group
95+ $ group = $ matches [ 1 ]; // Info, PageMedia, ...
96+ if (! isset ( $ output [ $ group ])) {
97+ $ output [ $ group ] = array () ;
10498 }
99+ $ groupData = array ();
105100 continue ;
106101 }
107- if ($ expectType !== null ) {
108- if (strpos ($ key , $ expectType ) === 0 ) {
109- $ buffer [str_replace ($ expectType , '' , $ key )] = $ value ;
102+ if (!empty ($ group )) {
103+ // Check for AbcData1: Value1
104+ if (preg_match ("/^ $ group(\w+): ?(.*)$/ " , $ trimmedLine , $ matches )) {
105+ $ groupData [$ matches [1 ]] = $ matches [2 ];
106+ continue ;
110107 } else {
111- throw new \Exception ("Unexpected input " );
108+ // Something else, so group ended
109+ if (!empty ($ groupData )) {
110+ $ output [$ group ][] = $ groupData ;
111+ $ groupData = array ();
112+ }
113+ $ group = null ;
112114 }
113- if ($ expectType === 'Bookmark ' && isset ($ buffer ['Level ' ], $ buffer ['Title ' ], $ buffer ['PageNumber ' ])) {
114- $ output [$ expectType ][] = $ buffer ;
115- $ buffer = array ();
116- $ expectType = null ;
117- } elseif ($ expectType === 'PageMedia ' && isset ($ buffer ['Number ' ], $ buffer ['Rotation ' ], $ buffer ['Rect ' ], $ buffer ['Dimensions ' ])) {
118- $ output [$ expectType ][] = $ buffer ;
119- $ buffer = array ();
120- $ expectType = null ;
115+ }
116+ if (preg_match ('/([^:]*): ?(.*)/ ' , $ trimmedLine , $ matches )) {
117+ $ output [$ matches [1 ]] = $ matches [2 ];
118+ }
119+ }
120+ // There could be a final group left if it was not followed by another
121+ // line in the loop
122+ if (!empty ($ group ) && !empty ($ groupData )) {
123+ $ output [$ group ][] = $ groupData ;
124+ }
125+
126+ // Info group is a list of ['Key' => 'x', 'Value' => 'y'], so
127+ // convert it to ['x' => 'y', ...]
128+ if (isset ($ output ['Info ' ])) {
129+ $ data = array ();
130+ foreach ($ output ['Info ' ] as $ infoGroup ) {
131+ if (isset ($ infoGroup ['Key ' ], $ infoGroup ['Value ' ])) {
132+ $ data [$ infoGroup ['Key ' ]] = $ infoGroup ['Value ' ];
121133 }
122- continue ;
123- } else {
124- $ output [$ key ] = $ value ;
125134 }
135+ $ output ['Info ' ] = $ data ;
126136 }
127137 return $ output ;
128138 }
0 commit comments