001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.math.linear;
019    
020    import java.io.Serializable;
021    
022    import org.apache.commons.math.Field;
023    import org.apache.commons.math.FieldElement;
024    import org.apache.commons.math.MathRuntimeException;
025    
026    /**
027     * Cache-friendly implementation of FieldMatrix using a flat arrays to store
028     * square blocks of the matrix.
029     * <p>
030     * This implementation is specially designed to be cache-friendly. Square blocks are
031     * stored as small arrays and allow efficient traversal of data both in row major direction
032     * and columns major direction, one block at a time. This greatly increases performances
033     * for algorithms that use crossed directions loops like multiplication or transposition.
034     * </p>
035     * <p>
036     * The size of square blocks is a static parameter. It may be tuned according to the cache
037     * size of the target computer processor. As a rule of thumbs, it should be the largest
038     * value that allows three blocks to be simultaneously cached (this is necessary for example
039     * for matrix multiplication). The default value is to use 36x36 blocks.
040     * </p>
041     * <p>
042     * The regular blocks represent {@link #BLOCK_SIZE} x {@link #BLOCK_SIZE} squares. Blocks
043     * at right hand side and bottom side which may be smaller to fit matrix dimensions. The square
044     * blocks are flattened in row major order in single dimension arrays which are therefore
045     * {@link #BLOCK_SIZE}<sup>2</sup> elements long for regular blocks. The blocks are themselves
046     * organized in row major order.
047     * </p>
048     * <p>
049     * As an example, for a block size of 36x36, a 100x60 matrix would be stored in 6 blocks.
050     * Block 0 would be a Field[1296] array holding the upper left 36x36 square, block 1 would be
051     * a Field[1296] array holding the upper center 36x36 square, block 2 would be a Field[1008]
052     * array holding the upper right 36x28 rectangle, block 3 would be a Field[864] array holding
053     * the lower left 24x36 rectangle, block 4 would be a Field[864] array holding the lower center
054     * 24x36 rectangle and block 5 would be a Field[672] array holding the lower right 24x28
055     * rectangle.
056     * </p>
057     * <p>
058     * The layout complexity overhead versus simple mapping of matrices to java
059     * arrays is negligible for small matrices (about 1%). The gain from cache efficiency leads
060     * to up to 3-fold improvements for matrices of moderate to large size.
061     * </p>
062     * @param <T> the type of the field elements
063     * @version $Revision: 783741 $ $Date: 2009-06-11 08:35:42 -0400 (Thu, 11 Jun 2009) $
064     * @since 2.0
065     */
066    public class BlockFieldMatrix<T extends FieldElement<T>> extends AbstractFieldMatrix<T> implements Serializable {
067        
068        /** Serializable version identifier */
069        private static final long serialVersionUID = -4602336630143123183L;
070    
071        /** Block size. */
072        public static final int BLOCK_SIZE = 36;
073    
074        /** Blocks of matrix entries. */
075        private final T blocks[][];
076    
077        /** Number of rows of the matrix. */
078        private final int rows;
079    
080        /** Number of columns of the matrix. */
081        private final int columns;
082    
083        /** Number of block rows of the matrix. */
084        private final int blockRows;
085    
086        /** Number of block columns of the matrix. */
087        private final int blockColumns;
088    
089        /**
090         * Create a new matrix with the supplied row and column dimensions.
091         *
092         * @param field field to which the elements belong
093         * @param rows  the number of rows in the new matrix
094         * @param columns  the number of columns in the new matrix
095         * @throws IllegalArgumentException if row or column dimension is not
096         *  positive
097         */
098        public BlockFieldMatrix(final Field<T> field, final int rows, final int columns)
099            throws IllegalArgumentException {
100    
101            super(field, rows, columns);
102            this.rows    = rows;
103            this.columns = columns;
104    
105            // number of blocks
106            blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
107            blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
108    
109            // allocate storage blocks, taking care of smaller ones at right and bottom
110            blocks = createBlocksLayout(field, rows, columns);
111    
112        }
113    
114        /**
115         * Create a new dense matrix copying entries from raw layout data.
116         * <p>The input array <em>must</em> already be in raw layout.</p>
117         * <p>Calling this constructor is equivalent to call:
118         * <pre>matrix = new BlockFieldMatrix<T>(getField(), rawData.length, rawData[0].length,
119         *                                   toBlocksLayout(rawData), false);</pre>
120         * </p>
121         * @param rawData data for new matrix, in raw layout
122         *
123         * @exception IllegalArgumentException if <code>blockData</code> shape is
124         * inconsistent with block layout
125         * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
126         */
127        public BlockFieldMatrix(final T[][] rawData)
128            throws IllegalArgumentException {
129            this(rawData.length, rawData[0].length, toBlocksLayout(rawData), false);
130        }
131    
132        /**
133         * Create a new dense matrix copying entries from block layout data.
134         * <p>The input array <em>must</em> already be in blocks layout.</p>
135         * @param rows  the number of rows in the new matrix
136         * @param columns  the number of columns in the new matrix
137         * @param blockData data for new matrix
138         * @param copyArray if true, the input array will be copied, otherwise
139         * it will be referenced
140         *
141         * @exception IllegalArgumentException if <code>blockData</code> shape is
142         * inconsistent with block layout
143         * @see #createBlocksLayout(Field, int, int)
144         * @see #toBlocksLayout(FieldElement[][])
145         * @see #BlockFieldMatrix(FieldElement[][])
146         */
147        public BlockFieldMatrix(final int rows, final int columns,
148                                final T[][] blockData, final boolean copyArray)
149            throws IllegalArgumentException {
150    
151            super(extractField(blockData), rows, columns);
152            this.rows    = rows;
153            this.columns = columns;
154    
155            // number of blocks
156            blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
157            blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
158    
159            if (copyArray) {
160                // allocate storage blocks, taking care of smaller ones at right and bottom
161                blocks = buildArray(getField(), blockRows * blockColumns, -1);
162            } else {
163                // reference existing array
164                blocks = blockData;
165            }
166    
167            int index = 0;
168            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
169                final int iHeight = blockHeight(iBlock);
170                for (int jBlock = 0; jBlock < blockColumns; ++jBlock, ++index) {
171                    if (blockData[index].length != iHeight * blockWidth(jBlock)) {
172                        throw MathRuntimeException.createIllegalArgumentException(
173                                "wrong array shape (block length = {0}, expected {1})",
174                                blockData[index].length, iHeight * blockWidth(jBlock));
175                    }
176                    if (copyArray) {
177                        blocks[index] = blockData[index].clone();
178                    }
179                }
180            }
181    
182        }
183    
184        /**
185         * Convert a data array from raw layout to blocks layout.
186         * <p>
187         * Raw layout is the straightforward layout where element at row i and
188         * column j is in array element <code>rawData[i][j]</code>. Blocks layout
189         * is the layout used in {@link BlockFieldMatrix} instances, where the matrix
190         * is split in square blocks (except at right and bottom side where blocks may
191         * be rectangular to fit matrix size) and each block is stored in a flattened
192         * one-dimensional array.
193         * </p>
194         * <p>
195         * This method creates an array in blocks layout from an input array in raw layout.
196         * It can be used to provide the array argument of the {@link
197         * #BlockFieldMatrix(int, int, FieldElement[][], boolean)}
198         * constructor.
199         * </p>
200         * @param <T> the type of the field elements
201         * @param rawData data array in raw layout
202         * @return a new data array containing the same entries but in blocks layout
203         * @exception IllegalArgumentException if <code>rawData</code> is not rectangular
204         *  (not all rows have the same length)
205         * @see #createBlocksLayout(Field, int, int)
206         * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
207         */
208        public static <T extends FieldElement<T>> T[][] toBlocksLayout(final T[][] rawData)
209            throws IllegalArgumentException {
210    
211            final int rows         = rawData.length;
212            final int columns      = rawData[0].length;
213            final int blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
214            final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
215    
216            // safety checks
217            for (int i = 0; i < rawData.length; ++i) {
218                final int length = rawData[i].length;
219                if (length != columns) {
220                    throw MathRuntimeException.createIllegalArgumentException(
221                            "some rows have length {0} while others have length {1}",
222                            columns, length); 
223                }
224            }
225    
226            // convert array
227            final Field<T> field = extractField(rawData);
228            final T[][] blocks = buildArray(field, blockRows * blockColumns, -1);
229            for (int iBlock = 0, blockIndex = 0; iBlock < blockRows; ++iBlock) {
230                final int pStart  = iBlock * BLOCK_SIZE;
231                final int pEnd    = Math.min(pStart + BLOCK_SIZE, rows);
232                final int iHeight = pEnd - pStart;
233                for (int jBlock = 0; jBlock < blockColumns; ++jBlock, ++blockIndex) {
234                    final int qStart = jBlock * BLOCK_SIZE;
235                    final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
236                    final int jWidth = qEnd - qStart;
237    
238                    // allocate new block
239                    final T[] block = buildArray(field, iHeight * jWidth);
240                    blocks[blockIndex] = block;
241    
242                    // copy data
243                    for (int p = pStart, index = 0; p < pEnd; ++p, index += jWidth) {
244                        System.arraycopy(rawData[p], qStart, block, index, jWidth);
245                    }
246    
247                }
248            }
249    
250            return blocks;
251    
252        }
253    
254        /**
255         * Create a data array in blocks layout.
256         * <p>
257         * This method can be used to create the array argument of the {@link
258         * #BlockFieldMatrix(int, int, FieldElement[][], boolean)}
259         * constructor.
260         * </p>
261         * @param <T> the type of the field elements
262         * @param field field to which the elements belong
263         * @param rows  the number of rows in the new matrix
264         * @param columns  the number of columns in the new matrix
265         * @return a new data array in blocks layout
266         * @see #toBlocksLayout(FieldElement[][])
267         * @see #BlockFieldMatrix(int, int, FieldElement[][], boolean)
268         */
269        public static <T extends FieldElement<T>> T[][] createBlocksLayout(final Field<T> field,
270                                                                           final int rows, final int columns) {
271    
272            final int blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
273            final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
274    
275            final T[][] blocks = buildArray(field, blockRows * blockColumns, -1);
276            for (int iBlock = 0, blockIndex = 0; iBlock < blockRows; ++iBlock) {
277                final int pStart  = iBlock * BLOCK_SIZE;
278                final int pEnd    = Math.min(pStart + BLOCK_SIZE, rows);
279                final int iHeight = pEnd - pStart;
280                for (int jBlock = 0; jBlock < blockColumns; ++jBlock, ++blockIndex) {
281                    final int qStart = jBlock * BLOCK_SIZE;
282                    final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
283                    final int jWidth = qEnd - qStart;
284                    blocks[blockIndex] = buildArray(field, iHeight * jWidth);
285                }
286            }
287    
288            return blocks;
289    
290        }
291    
292        /** {@inheritDoc} */
293        @Override
294        public FieldMatrix<T> createMatrix(final int rowDimension, final int columnDimension)
295            throws IllegalArgumentException {
296            return new BlockFieldMatrix<T>(getField(), rowDimension, columnDimension);
297        }
298    
299        /** {@inheritDoc} */
300        @Override
301        public FieldMatrix<T> copy() {
302    
303            // create an empty matrix
304            BlockFieldMatrix<T> copied = new BlockFieldMatrix<T>(getField(), rows, columns);
305    
306            // copy the blocks
307            for (int i = 0; i < blocks.length; ++i) {
308                System.arraycopy(blocks[i], 0, copied.blocks[i], 0, blocks[i].length);
309            }
310    
311            return copied;
312    
313        }
314    
315        /** {@inheritDoc} */
316        @Override
317        public FieldMatrix<T> add(final FieldMatrix<T> m)
318            throws IllegalArgumentException {
319            try {
320                return add((BlockFieldMatrix<T>) m);
321            } catch (ClassCastException cce) {
322    
323                // safety check
324                checkAdditionCompatible(m);
325    
326                final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
327    
328                // perform addition block-wise, to ensure good cache behavior
329                int blockIndex = 0;
330                for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
331                    for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
332    
333                        // perform addition on the current block
334                        final T[] outBlock = out.blocks[blockIndex];
335                        final T[] tBlock   = blocks[blockIndex];
336                        final int      pStart   = iBlock * BLOCK_SIZE;
337                        final int      pEnd     = Math.min(pStart + BLOCK_SIZE, rows);
338                        final int      qStart   = jBlock * BLOCK_SIZE;
339                        final int      qEnd     = Math.min(qStart + BLOCK_SIZE, columns);
340                        for (int p = pStart, k = 0; p < pEnd; ++p) {
341                            for (int q = qStart; q < qEnd; ++q, ++k) {
342                                outBlock[k] = tBlock[k].add(m.getEntry(p, q));
343                            }
344                        }
345    
346                        // go to next block
347                        ++blockIndex;
348    
349                    }
350                }
351    
352                return out;
353    
354            }
355        }
356    
357        /**
358         * Compute the sum of this and <code>m</code>.
359         *
360         * @param m    matrix to be added
361         * @return     this + m
362         * @throws  IllegalArgumentException if m is not the same size as this
363         */
364        public BlockFieldMatrix<T> add(final BlockFieldMatrix<T> m)
365            throws IllegalArgumentException {
366    
367            // safety check
368            checkAdditionCompatible(m);
369    
370            final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
371    
372            // perform addition block-wise, to ensure good cache behavior
373            for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
374                final T[] outBlock = out.blocks[blockIndex];
375                final T[] tBlock   = blocks[blockIndex];
376                final T[] mBlock   = m.blocks[blockIndex];
377                for (int k = 0; k < outBlock.length; ++k) {
378                    outBlock[k] = tBlock[k].add(mBlock[k]);
379                }
380            }
381    
382            return out;
383    
384        }
385    
386        /** {@inheritDoc} */
387        @Override
388        public FieldMatrix<T> subtract(final FieldMatrix<T> m)
389            throws IllegalArgumentException {
390            try {
391                return subtract((BlockFieldMatrix<T>) m);
392            } catch (ClassCastException cce) {
393    
394                // safety check
395                checkSubtractionCompatible(m);
396    
397                final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
398    
399                // perform subtraction block-wise, to ensure good cache behavior
400                int blockIndex = 0;
401                for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
402                    for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
403    
404                        // perform subtraction on the current block
405                        final T[] outBlock = out.blocks[blockIndex];
406                        final T[] tBlock   = blocks[blockIndex];
407                        final int      pStart   = iBlock * BLOCK_SIZE;
408                        final int      pEnd     = Math.min(pStart + BLOCK_SIZE, rows);
409                        final int      qStart   = jBlock * BLOCK_SIZE;
410                        final int      qEnd     = Math.min(qStart + BLOCK_SIZE, columns);
411                        for (int p = pStart, k = 0; p < pEnd; ++p) {
412                            for (int q = qStart; q < qEnd; ++q, ++k) {
413                                outBlock[k] = tBlock[k].subtract(m.getEntry(p, q));
414                            }
415                        }
416    
417                        // go to next block
418                        ++blockIndex;
419    
420                    }
421                }
422    
423                return out;
424    
425            }
426        }
427    
428        /**
429         * Compute this minus <code>m</code>.
430         *
431         * @param m    matrix to be subtracted
432         * @return     this - m
433         * @throws  IllegalArgumentException if m is not the same size as this
434         */
435        public BlockFieldMatrix<T> subtract(final BlockFieldMatrix<T> m)
436            throws IllegalArgumentException {
437    
438            // safety check
439            checkSubtractionCompatible(m);
440    
441            final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
442    
443            // perform subtraction block-wise, to ensure good cache behavior
444            for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
445                final T[] outBlock = out.blocks[blockIndex];
446                final T[] tBlock   = blocks[blockIndex];
447                final T[] mBlock   = m.blocks[blockIndex];
448                for (int k = 0; k < outBlock.length; ++k) {
449                    outBlock[k] = tBlock[k].subtract(mBlock[k]);
450                }
451            }
452    
453            return out;
454    
455        }
456    
457        /** {@inheritDoc} */
458        @Override
459        public FieldMatrix<T> scalarAdd(final T d)
460            throws IllegalArgumentException {
461    
462            final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
463    
464            // perform subtraction block-wise, to ensure good cache behavior
465            for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
466                final T[] outBlock = out.blocks[blockIndex];
467                final T[] tBlock   = blocks[blockIndex];
468                for (int k = 0; k < outBlock.length; ++k) {
469                    outBlock[k] = tBlock[k].add(d);
470                }
471            }
472    
473            return out;
474    
475        }
476    
477        /** {@inheritDoc} */
478        @Override
479        public FieldMatrix<T> scalarMultiply(final T d)
480            throws IllegalArgumentException {
481    
482            final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, columns);
483    
484            // perform subtraction block-wise, to ensure good cache behavior
485            for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
486                final T[] outBlock = out.blocks[blockIndex];
487                final T[] tBlock   = blocks[blockIndex];
488                for (int k = 0; k < outBlock.length; ++k) {
489                    outBlock[k] = tBlock[k].multiply(d);
490                }
491            }
492    
493            return out;
494    
495        }
496    
497        /** {@inheritDoc} */
498        @Override
499        public FieldMatrix<T> multiply(final FieldMatrix<T> m)
500            throws IllegalArgumentException {
501            try {
502                return multiply((BlockFieldMatrix<T>) m);
503            } catch (ClassCastException cce) {
504    
505                // safety check
506                checkMultiplicationCompatible(m);
507    
508                final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, m.getColumnDimension());
509                final T zero = getField().getZero();
510    
511                // perform multiplication block-wise, to ensure good cache behavior
512                int blockIndex = 0;
513                for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
514    
515                    final int pStart = iBlock * BLOCK_SIZE;
516                    final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
517    
518                    for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
519    
520                        final int qStart = jBlock * BLOCK_SIZE;
521                        final int qEnd   = Math.min(qStart + BLOCK_SIZE, m.getColumnDimension());
522    
523                        // select current block
524                        final T[] outBlock = out.blocks[blockIndex];
525    
526                        // perform multiplication on current block
527                        for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
528                            final int kWidth      = blockWidth(kBlock);
529                            final T[] tBlock = blocks[iBlock * blockColumns + kBlock];
530                            final int rStart      = kBlock * BLOCK_SIZE;
531                            for (int p = pStart, k = 0; p < pEnd; ++p) {
532                                final int lStart = (p - pStart) * kWidth;
533                                final int lEnd   = lStart + kWidth;
534                                for (int q = qStart; q < qEnd; ++q) {
535                                    T sum = zero;
536                                    for (int l = lStart, r = rStart; l < lEnd; ++l, ++r) {
537                                        sum = sum.add(tBlock[l].multiply(m.getEntry(r, q)));
538                                    }
539                                    outBlock[k] = outBlock[k].add(sum);
540                                    ++k;
541                                }
542                            }
543                        }
544    
545                        // go to next block
546                        ++blockIndex;
547    
548                    }
549                }
550    
551                return out;
552    
553            }
554        }
555    
556        /**
557         * Returns the result of postmultiplying this by m.
558         *
559         * @param m    matrix to postmultiply by
560         * @return     this * m
561         * @throws     IllegalArgumentException
562         *             if columnDimension(this) != rowDimension(m)
563         */
564        public BlockFieldMatrix<T> multiply(BlockFieldMatrix<T> m) throws IllegalArgumentException {
565    
566            // safety check
567            checkMultiplicationCompatible(m);
568    
569            final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, m.columns);
570            final T zero = getField().getZero();
571    
572            // perform multiplication block-wise, to ensure good cache behavior
573            int blockIndex = 0;
574            for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
575    
576                final int pStart = iBlock * BLOCK_SIZE;
577                final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
578    
579                for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
580                    final int jWidth = out.blockWidth(jBlock);
581                    final int jWidth2 = jWidth  + jWidth;
582                    final int jWidth3 = jWidth2 + jWidth;
583                    final int jWidth4 = jWidth3 + jWidth;
584    
585                    // select current block
586                    final T[] outBlock = out.blocks[blockIndex];
587    
588                    // perform multiplication on current block
589                    for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
590                        final int kWidth = blockWidth(kBlock);
591                        final T[] tBlock = blocks[iBlock * blockColumns + kBlock];
592                        final T[] mBlock = m.blocks[kBlock * m.blockColumns + jBlock];
593                        for (int p = pStart, k = 0; p < pEnd; ++p) {
594                            final int lStart = (p - pStart) * kWidth;
595                            final int lEnd   = lStart + kWidth;
596                            for (int nStart = 0; nStart < jWidth; ++nStart) {
597                                T sum = zero;
598                                int l = lStart;
599                                int n = nStart;
600                                while (l < lEnd - 3) {
601                                    sum = sum.
602                                          add(tBlock[l].multiply(mBlock[n])).
603                                          add(tBlock[l + 1].multiply(mBlock[n + jWidth])).
604                                          add(tBlock[l + 2].multiply(mBlock[n + jWidth2])).
605                                          add(tBlock[l + 3].multiply(mBlock[n + jWidth3]));
606                                    l += 4;
607                                    n += jWidth4;
608                                }
609                                while (l < lEnd) {
610                                    sum = sum.add(tBlock[l++].multiply(mBlock[n]));
611                                    n += jWidth;
612                                }
613                                outBlock[k] = outBlock[k].add(sum);
614                                ++k;
615                            }
616                        }
617                    }
618    
619                    // go to next block
620                    ++blockIndex;
621    
622                }
623            }
624    
625            return out;
626    
627        }
628    
629        /** {@inheritDoc} */
630        @Override
631        public T[][] getData() {
632    
633            final T[][] data = buildArray(getField(), getRowDimension(), getColumnDimension());
634            final int lastColumns = columns - (blockColumns - 1) * BLOCK_SIZE;
635    
636            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
637                final int pStart = iBlock * BLOCK_SIZE;
638                final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
639                int regularPos   = 0;
640                int lastPos      = 0;
641                for (int p = pStart; p < pEnd; ++p) {
642                    final T[] dataP = data[p];
643                    int blockIndex = iBlock * blockColumns;
644                    int dataPos    = 0;
645                    for (int jBlock = 0; jBlock < blockColumns - 1; ++jBlock) {
646                        System.arraycopy(blocks[blockIndex++], regularPos, dataP, dataPos, BLOCK_SIZE);
647                        dataPos += BLOCK_SIZE;
648                    }
649                    System.arraycopy(blocks[blockIndex], lastPos, dataP, dataPos, lastColumns);
650                    regularPos += BLOCK_SIZE;
651                    lastPos    += lastColumns;
652                }
653            }
654    
655            return data;
656            
657        }
658    
659        /** {@inheritDoc} */
660        @Override
661        public FieldMatrix<T> getSubMatrix(final int startRow, final int endRow,
662                                       final int startColumn, final int endColumn)
663            throws MatrixIndexException {
664    
665            // safety checks
666            checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
667    
668            // create the output matrix
669            final BlockFieldMatrix<T> out =
670                new BlockFieldMatrix<T>(getField(), endRow - startRow + 1, endColumn - startColumn + 1);
671    
672            // compute blocks shifts
673            final int blockStartRow    = startRow    / BLOCK_SIZE;
674            final int rowsShift        = startRow    % BLOCK_SIZE;
675            final int blockStartColumn = startColumn / BLOCK_SIZE;
676            final int columnsShift     = startColumn % BLOCK_SIZE;
677    
678            // perform extraction block-wise, to ensure good cache behavior
679            for (int iBlock = 0, pBlock = blockStartRow; iBlock < out.blockRows; ++iBlock, ++pBlock) {
680                final int iHeight = out.blockHeight(iBlock);
681                for (int jBlock = 0, qBlock = blockStartColumn; jBlock < out.blockColumns; ++jBlock, ++qBlock) {
682                    final int jWidth = out.blockWidth(jBlock);
683    
684                    // handle one block of the output matrix
685                    final int      outIndex = iBlock * out.blockColumns + jBlock;
686                    final T[] outBlock = out.blocks[outIndex];
687                    final int      index    = pBlock * blockColumns + qBlock;
688                    final int      width    = blockWidth(qBlock);
689    
690                    final int heightExcess = iHeight + rowsShift - BLOCK_SIZE;
691                    final int widthExcess  = jWidth + columnsShift - BLOCK_SIZE;
692                    if (heightExcess > 0) {
693                        // the submatrix block spans on two blocks rows from the original matrix
694                        if (widthExcess > 0) {
695                            // the submatrix block spans on two blocks columns from the original matrix
696                            final int width2 = blockWidth(qBlock + 1);
697                            copyBlockPart(blocks[index], width,
698                                          rowsShift, BLOCK_SIZE,
699                                          columnsShift, BLOCK_SIZE,
700                                          outBlock, jWidth, 0, 0);
701                            copyBlockPart(blocks[index + 1], width2,
702                                          rowsShift, BLOCK_SIZE,
703                                          0, widthExcess,
704                                          outBlock, jWidth, 0, jWidth - widthExcess);
705                            copyBlockPart(blocks[index + blockColumns], width,
706                                          0, heightExcess,
707                                          columnsShift, BLOCK_SIZE,
708                                          outBlock, jWidth, iHeight - heightExcess, 0);
709                            copyBlockPart(blocks[index + blockColumns + 1], width2,
710                                          0, heightExcess,
711                                          0, widthExcess,
712                                          outBlock, jWidth, iHeight - heightExcess, jWidth - widthExcess);
713                        } else {
714                            // the submatrix block spans on one block column from the original matrix
715                            copyBlockPart(blocks[index], width,
716                                          rowsShift, BLOCK_SIZE,
717                                          columnsShift, jWidth + columnsShift,
718                                          outBlock, jWidth, 0, 0);
719                            copyBlockPart(blocks[index + blockColumns], width,
720                                          0, heightExcess,
721                                          columnsShift, jWidth + columnsShift,
722                                          outBlock, jWidth, iHeight - heightExcess, 0);
723                        }
724                    } else {
725                        // the submatrix block spans on one block row from the original matrix
726                        if (widthExcess > 0) {
727                            // the submatrix block spans on two blocks columns from the original matrix
728                            final int width2 = blockWidth(qBlock + 1);
729                            copyBlockPart(blocks[index], width,
730                                          rowsShift, iHeight + rowsShift,
731                                          columnsShift, BLOCK_SIZE,
732                                          outBlock, jWidth, 0, 0);
733                            copyBlockPart(blocks[index + 1], width2,
734                                          rowsShift, iHeight + rowsShift,
735                                          0, widthExcess,
736                                          outBlock, jWidth, 0, jWidth - widthExcess);
737                        } else {
738                            // the submatrix block spans on one block column from the original matrix
739                            copyBlockPart(blocks[index], width,
740                                          rowsShift, iHeight + rowsShift,
741                                          columnsShift, jWidth + columnsShift,
742                                          outBlock, jWidth, 0, 0);
743                        }
744                   }
745    
746                }
747            }
748    
749            return out;
750    
751        }
752    
753        /**
754         * Copy a part of a block into another one
755         * <p>This method can be called only when the specified part fits in both
756         * blocks, no verification is done here.</p>
757         * @param srcBlock source block
758         * @param srcWidth source block width ({@link #BLOCK_SIZE} or smaller)
759         * @param srcStartRow start row in the source block
760         * @param srcEndRow end row (exclusive) in the source block
761         * @param srcStartColumn start column in the source block
762         * @param srcEndColumn end column (exclusive) in the source block
763         * @param dstBlock destination block
764         * @param dstWidth destination block width ({@link #BLOCK_SIZE} or smaller)
765         * @param dstStartRow start row in the destination block
766         * @param dstStartColumn start column in the destination block
767         */
768        private void copyBlockPart(final T[] srcBlock, final int srcWidth,
769                                   final int srcStartRow, final int srcEndRow,
770                                   final int srcStartColumn, final int srcEndColumn,
771                                   final T[] dstBlock, final int dstWidth,
772                                   final int dstStartRow, final int dstStartColumn) {
773            final int length = srcEndColumn - srcStartColumn;
774            int srcPos = srcStartRow * srcWidth + srcStartColumn;
775            int dstPos = dstStartRow * dstWidth + dstStartColumn;
776            for (int srcRow = srcStartRow; srcRow < srcEndRow; ++srcRow) {
777                System.arraycopy(srcBlock, srcPos, dstBlock, dstPos, length);
778                srcPos += srcWidth;
779                dstPos += dstWidth;
780            }
781        }
782    
783        /** {@inheritDoc} */
784        @Override
785        public void setSubMatrix(final T[][] subMatrix, final int row, final int column)
786            throws MatrixIndexException {
787    
788            // safety checks
789            final int refLength = subMatrix[0].length;
790            if (refLength < 1) {
791                throw MathRuntimeException.createIllegalArgumentException("matrix must have at least one column");             
792            }
793            final int endRow    = row + subMatrix.length - 1;
794            final int endColumn = column + refLength - 1;
795            checkSubMatrixIndex(row, endRow, column, endColumn);
796            for (final T[] subRow : subMatrix) {
797                if (subRow.length != refLength) {
798                    throw MathRuntimeException.createIllegalArgumentException(
799                            "some rows have length {0} while others have length {1}",
800                            refLength, subRow.length); 
801                }
802            }
803    
804            // compute blocks bounds
805            final int blockStartRow    = row / BLOCK_SIZE;
806            final int blockEndRow      = (endRow + BLOCK_SIZE) / BLOCK_SIZE;
807            final int blockStartColumn = column / BLOCK_SIZE;
808            final int blockEndColumn   = (endColumn + BLOCK_SIZE) / BLOCK_SIZE;
809    
810            // perform copy block-wise, to ensure good cache behavior
811            for (int iBlock = blockStartRow; iBlock < blockEndRow; ++iBlock) {
812                final int iHeight  = blockHeight(iBlock);
813                final int firstRow = iBlock * BLOCK_SIZE;
814                final int iStart   = Math.max(row,    firstRow);
815                final int iEnd     = Math.min(endRow + 1, firstRow + iHeight);
816    
817                for (int jBlock = blockStartColumn; jBlock < blockEndColumn; ++jBlock) {
818                    final int jWidth      = blockWidth(jBlock);
819                    final int firstColumn = jBlock * BLOCK_SIZE;
820                    final int jStart      = Math.max(column,    firstColumn);
821                    final int jEnd        = Math.min(endColumn + 1, firstColumn + jWidth);
822                    final int jLength     = jEnd - jStart;
823    
824                    // handle one block, row by row
825                    final T[] block = blocks[iBlock * blockColumns + jBlock];
826                    for (int i = iStart; i < iEnd; ++i) {
827                        System.arraycopy(subMatrix[i - row], jStart - column,
828                                         block, (i - firstRow) * jWidth + (jStart - firstColumn),
829                                         jLength);
830                    }
831    
832                }
833            }
834        }
835    
836        /** {@inheritDoc} */
837        @Override
838        public FieldMatrix<T> getRowMatrix(final int row)
839            throws MatrixIndexException {
840    
841            checkRowIndex(row);
842            final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), 1, columns);
843    
844            // perform copy block-wise, to ensure good cache behavior
845            final int iBlock  = row / BLOCK_SIZE;
846            final int iRow    = row - iBlock * BLOCK_SIZE;
847            int outBlockIndex = 0;
848            int outIndex      = 0;
849            T[] outBlock = out.blocks[outBlockIndex];
850            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
851                final int jWidth     = blockWidth(jBlock);
852                final T[] block = blocks[iBlock * blockColumns + jBlock];
853                final int available  = outBlock.length - outIndex;
854                if (jWidth > available) {
855                    System.arraycopy(block, iRow * jWidth, outBlock, outIndex, available);
856                    outBlock = out.blocks[++outBlockIndex];
857                    System.arraycopy(block, iRow * jWidth, outBlock, 0, jWidth - available);
858                    outIndex = jWidth - available;
859                } else {
860                    System.arraycopy(block, iRow * jWidth, outBlock, outIndex, jWidth);
861                    outIndex += jWidth;
862                }
863            }
864    
865            return out;
866    
867        }
868    
869        /** {@inheritDoc} */
870        @Override
871        public void setRowMatrix(final int row, final FieldMatrix<T> matrix)
872            throws MatrixIndexException, InvalidMatrixException {
873            try {
874                setRowMatrix(row, (BlockFieldMatrix<T>) matrix);
875            } catch (ClassCastException cce) {
876                super.setRowMatrix(row, matrix);
877            }
878        }
879    
880        /**
881         * Sets the entries in row number <code>row</code>
882         * as a row matrix.  Row indices start at 0.
883         *
884         * @param row the row to be set
885         * @param matrix row matrix (must have one row and the same number of columns
886         * as the instance)
887         * @throws MatrixIndexException if the specified row index is invalid
888         * @throws InvalidMatrixException if the matrix dimensions do not match one
889         * instance row
890         */
891        public void setRowMatrix(final int row, final BlockFieldMatrix<T> matrix)
892            throws MatrixIndexException, InvalidMatrixException {
893    
894            checkRowIndex(row);
895            final int nCols = getColumnDimension();
896            if ((matrix.getRowDimension() != 1) ||
897                (matrix.getColumnDimension() != nCols)) {
898                throw new InvalidMatrixException(
899                        "dimensions mismatch: got {0}x{1} but expected {2}x{3}",
900                        matrix.getRowDimension(), matrix.getColumnDimension(),
901                        1, nCols);
902            }
903    
904            // perform copy block-wise, to ensure good cache behavior
905            final int iBlock = row / BLOCK_SIZE;
906            final int iRow   = row - iBlock * BLOCK_SIZE;
907            int mBlockIndex  = 0;
908            int mIndex       = 0;
909            T[] mBlock  = matrix.blocks[mBlockIndex];
910            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
911                final int jWidth     = blockWidth(jBlock);
912                final T[] block = blocks[iBlock * blockColumns + jBlock];
913                final int available  = mBlock.length - mIndex;
914                if (jWidth > available) {
915                    System.arraycopy(mBlock, mIndex, block, iRow * jWidth, available);
916                    mBlock = matrix.blocks[++mBlockIndex];
917                    System.arraycopy(mBlock, 0, block, iRow * jWidth, jWidth - available);
918                    mIndex = jWidth - available;
919                } else {
920                    System.arraycopy(mBlock, mIndex, block, iRow * jWidth, jWidth);
921                    mIndex += jWidth;
922               }
923            }
924    
925        }
926        
927        /** {@inheritDoc} */
928        @Override
929        public FieldMatrix<T> getColumnMatrix(final int column)
930            throws MatrixIndexException {
931    
932            checkColumnIndex(column);
933            final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), rows, 1);
934    
935            // perform copy block-wise, to ensure good cache behavior
936            final int jBlock  = column / BLOCK_SIZE;
937            final int jColumn = column - jBlock * BLOCK_SIZE;
938            final int jWidth  = blockWidth(jBlock);
939            int outBlockIndex = 0;
940            int outIndex      = 0;
941            T[] outBlock = out.blocks[outBlockIndex];
942            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
943                final int iHeight = blockHeight(iBlock);
944                final T[] block = blocks[iBlock * blockColumns + jBlock];
945                for (int i = 0; i < iHeight; ++i) {
946                    if (outIndex >= outBlock.length) {
947                        outBlock = out.blocks[++outBlockIndex];
948                        outIndex = 0;
949                    }
950                    outBlock[outIndex++] = block[i * jWidth + jColumn];
951                }
952            }
953    
954            return out;
955    
956        }
957    
958        /** {@inheritDoc} */
959        @Override
960        public void setColumnMatrix(final int column, final FieldMatrix<T> matrix)
961            throws MatrixIndexException, InvalidMatrixException {
962            try {
963                setColumnMatrix(column, (BlockFieldMatrix<T>) matrix);
964            } catch (ClassCastException cce) {
965                super.setColumnMatrix(column, matrix);
966            }
967        }
968    
969        /**
970         * Sets the entries in column number <code>column</code>
971         * as a column matrix.  Column indices start at 0.
972         *
973         * @param column the column to be set
974         * @param matrix column matrix (must have one column and the same number of rows
975         * as the instance)
976         * @throws MatrixIndexException if the specified column index is invalid
977         * @throws InvalidMatrixException if the matrix dimensions do not match one
978         * instance column
979         */
980        void setColumnMatrix(final int column, final BlockFieldMatrix<T> matrix)
981            throws MatrixIndexException, InvalidMatrixException {
982    
983            checkColumnIndex(column);
984            final int nRows = getRowDimension();
985            if ((matrix.getRowDimension() != nRows) ||
986                (matrix.getColumnDimension() != 1)) {
987                throw new InvalidMatrixException(
988                        "dimensions mismatch: got {0}x{1} but expected {2}x{3}",
989                        matrix.getRowDimension(), matrix.getColumnDimension(),
990                        nRows, 1);
991            }
992    
993            // perform copy block-wise, to ensure good cache behavior
994            final int jBlock  = column / BLOCK_SIZE;
995            final int jColumn = column - jBlock * BLOCK_SIZE;
996            final int jWidth  = blockWidth(jBlock);
997            int mBlockIndex = 0;
998            int mIndex      = 0;
999            T[] mBlock = matrix.blocks[mBlockIndex];
1000            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1001                final int iHeight = blockHeight(iBlock);
1002                final T[] block = blocks[iBlock * blockColumns + jBlock];
1003                for (int i = 0; i < iHeight; ++i) {
1004                    if (mIndex >= mBlock.length) {
1005                        mBlock = matrix.blocks[++mBlockIndex];
1006                        mIndex = 0;
1007                    }
1008                    block[i * jWidth + jColumn] = mBlock[mIndex++];
1009                }
1010            }
1011    
1012        }
1013    
1014        /** {@inheritDoc} */
1015        @Override
1016        public FieldVector<T> getRowVector(final int row)
1017            throws MatrixIndexException {
1018    
1019            checkRowIndex(row);
1020            final T[] outData = buildArray(getField(), columns);
1021    
1022            // perform copy block-wise, to ensure good cache behavior
1023            final int iBlock  = row / BLOCK_SIZE;
1024            final int iRow    = row - iBlock * BLOCK_SIZE;
1025            int outIndex      = 0;
1026            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1027                final int jWidth     = blockWidth(jBlock);
1028                final T[] block = blocks[iBlock * blockColumns + jBlock];
1029                System.arraycopy(block, iRow * jWidth, outData, outIndex, jWidth);
1030                outIndex += jWidth;
1031            }
1032    
1033            return new ArrayFieldVector<T>(outData, false);
1034    
1035        }
1036    
1037        /** {@inheritDoc} */
1038        @Override
1039        public void setRowVector(final int row, final FieldVector<T> vector)
1040            throws MatrixIndexException, InvalidMatrixException {
1041            try {
1042                setRow(row, ((ArrayFieldVector<T>) vector).getDataRef());
1043            } catch (ClassCastException cce) {
1044                super.setRowVector(row, vector);
1045            }
1046        }
1047    
1048        /** {@inheritDoc} */
1049        @Override
1050        public FieldVector<T> getColumnVector(final int column)
1051            throws MatrixIndexException {
1052    
1053            checkColumnIndex(column);
1054            final T[] outData = buildArray(getField(), rows);
1055    
1056            // perform copy block-wise, to ensure good cache behavior
1057            final int jBlock  = column / BLOCK_SIZE;
1058            final int jColumn = column - jBlock * BLOCK_SIZE;
1059            final int jWidth  = blockWidth(jBlock);
1060            int outIndex      = 0;
1061            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1062                final int iHeight = blockHeight(iBlock);
1063                final T[] block = blocks[iBlock * blockColumns + jBlock];
1064                for (int i = 0; i < iHeight; ++i) {
1065                    outData[outIndex++] = block[i * jWidth + jColumn];
1066                }
1067            }
1068    
1069            return new ArrayFieldVector<T>(outData, false);
1070    
1071        }
1072    
1073        /** {@inheritDoc} */
1074        @Override
1075        public void setColumnVector(final int column, final FieldVector<T> vector)
1076            throws MatrixIndexException, InvalidMatrixException {
1077            try {
1078                setColumn(column, ((ArrayFieldVector<T>) vector).getDataRef());
1079            } catch (ClassCastException cce) {
1080                super.setColumnVector(column, vector);
1081            }
1082        }
1083    
1084        /** {@inheritDoc} */
1085        @Override
1086        public T[] getRow(final int row)
1087            throws MatrixIndexException {
1088    
1089            checkRowIndex(row);
1090            final T[] out = buildArray(getField(), columns);
1091    
1092            // perform copy block-wise, to ensure good cache behavior
1093            final int iBlock  = row / BLOCK_SIZE;
1094            final int iRow    = row - iBlock * BLOCK_SIZE;
1095            int outIndex      = 0;
1096            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1097                final int jWidth     = blockWidth(jBlock);
1098                final T[] block = blocks[iBlock * blockColumns + jBlock];
1099                System.arraycopy(block, iRow * jWidth, out, outIndex, jWidth);
1100                outIndex += jWidth;
1101            }
1102    
1103            return out;
1104    
1105        }
1106    
1107        /** {@inheritDoc} */
1108        @Override
1109        public void setRow(final int row, final T[] array)
1110            throws MatrixIndexException, InvalidMatrixException {
1111    
1112            checkRowIndex(row);
1113            final int nCols = getColumnDimension();
1114            if (array.length != nCols) {
1115                throw new InvalidMatrixException(
1116                        "dimensions mismatch: got {0}x{1} but expected {2}x{3}",
1117                        1, array.length, 1, nCols);
1118            }
1119    
1120            // perform copy block-wise, to ensure good cache behavior
1121            final int iBlock  = row / BLOCK_SIZE;
1122            final int iRow    = row - iBlock * BLOCK_SIZE;
1123            int outIndex      = 0;
1124            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1125                final int jWidth     = blockWidth(jBlock);
1126                final T[] block = blocks[iBlock * blockColumns + jBlock];
1127                System.arraycopy(array, outIndex, block, iRow * jWidth, jWidth);
1128                outIndex += jWidth;
1129            }
1130    
1131        }
1132    
1133        /** {@inheritDoc} */
1134        @Override
1135        public T[] getColumn(final int column)
1136            throws MatrixIndexException {
1137    
1138            checkColumnIndex(column);
1139            final T[] out = buildArray(getField(), rows);
1140    
1141            // perform copy block-wise, to ensure good cache behavior
1142            final int jBlock  = column / BLOCK_SIZE;
1143            final int jColumn = column - jBlock * BLOCK_SIZE;
1144            final int jWidth  = blockWidth(jBlock);
1145            int outIndex      = 0;
1146            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1147                final int iHeight = blockHeight(iBlock);
1148                final T[] block = blocks[iBlock * blockColumns + jBlock];
1149                for (int i = 0; i < iHeight; ++i) {
1150                    out[outIndex++] = block[i * jWidth + jColumn];
1151                }
1152            }
1153    
1154            return out;
1155    
1156        }
1157    
1158        /** {@inheritDoc} */
1159        @Override
1160        public void setColumn(final int column, final T[] array)
1161            throws MatrixIndexException, InvalidMatrixException {
1162    
1163            checkColumnIndex(column);
1164            final int nRows = getRowDimension();
1165            if (array.length != nRows) {
1166                throw new InvalidMatrixException(
1167                        "dimensions mismatch: got {0}x{1} but expected {2}x{3}",
1168                        array.length, 1, nRows, 1);
1169            }
1170    
1171            // perform copy block-wise, to ensure good cache behavior
1172            final int jBlock  = column / BLOCK_SIZE;
1173            final int jColumn = column - jBlock * BLOCK_SIZE;
1174            final int jWidth  = blockWidth(jBlock);
1175            int outIndex      = 0;
1176            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1177                final int iHeight = blockHeight(iBlock);
1178                final T[] block = blocks[iBlock * blockColumns + jBlock];
1179                for (int i = 0; i < iHeight; ++i) {
1180                    block[i * jWidth + jColumn] = array[outIndex++];
1181                }
1182            }
1183    
1184        }
1185    
1186        /** {@inheritDoc} */
1187        @Override
1188        public T getEntry(final int row, final int column)
1189            throws MatrixIndexException {
1190            try {
1191                final int iBlock = row    / BLOCK_SIZE;
1192                final int jBlock = column / BLOCK_SIZE;
1193                final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1194                                   (column - jBlock * BLOCK_SIZE);
1195                return blocks[iBlock * blockColumns + jBlock][k];
1196            } catch (ArrayIndexOutOfBoundsException e) {
1197                throw new MatrixIndexException(
1198                        "no entry at indices ({0}, {1}) in a {2}x{3} matrix",
1199                        row, column, getRowDimension(), getColumnDimension());
1200            }
1201        }
1202    
1203        /** {@inheritDoc} */
1204        @Override
1205        public void setEntry(final int row, final int column, final T value)
1206            throws MatrixIndexException {
1207            try {
1208                final int iBlock = row    / BLOCK_SIZE;
1209                final int jBlock = column / BLOCK_SIZE;
1210                final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1211                                   (column - jBlock * BLOCK_SIZE);
1212                blocks[iBlock * blockColumns + jBlock][k] = value;
1213            } catch (ArrayIndexOutOfBoundsException e) {
1214                throw new MatrixIndexException(
1215                        "no entry at indices ({0}, {1}) in a {2}x{3} matrix",
1216                        row, column, getRowDimension(), getColumnDimension());
1217            }
1218        }
1219    
1220        /** {@inheritDoc} */
1221        @Override
1222        public void addToEntry(final int row, final int column, final T increment)
1223            throws MatrixIndexException {
1224            try {
1225                final int iBlock = row    / BLOCK_SIZE;
1226                final int jBlock = column / BLOCK_SIZE;
1227                final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1228                                   (column - jBlock * BLOCK_SIZE);
1229                final T[] blockIJ = blocks[iBlock * blockColumns + jBlock];
1230                blockIJ[k] = blockIJ[k].add(increment);
1231            } catch (ArrayIndexOutOfBoundsException e) {
1232                throw new MatrixIndexException(
1233                        "no entry at indices ({0}, {1}) in a {2}x{3} matrix",
1234                        row, column, getRowDimension(), getColumnDimension());
1235            }
1236        }
1237    
1238        /** {@inheritDoc} */
1239        @Override
1240        public void multiplyEntry(final int row, final int column, final T factor)
1241            throws MatrixIndexException {
1242            try {
1243                final int iBlock = row    / BLOCK_SIZE;
1244                final int jBlock = column / BLOCK_SIZE;
1245                final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1246                                   (column - jBlock * BLOCK_SIZE);
1247                final T[] blockIJ = blocks[iBlock * blockColumns + jBlock];
1248                blockIJ[k] = blockIJ[k].multiply(factor);
1249            } catch (ArrayIndexOutOfBoundsException e) {
1250                throw new MatrixIndexException(
1251                        "no entry at indices ({0}, {1}) in a {2}x{3} matrix",
1252                        row, column, getRowDimension(), getColumnDimension());
1253            }
1254        }
1255    
1256        /** {@inheritDoc} */
1257        @Override
1258        public FieldMatrix<T> transpose() {
1259    
1260            final int nRows = getRowDimension();
1261            final int nCols = getColumnDimension();
1262            final BlockFieldMatrix<T> out = new BlockFieldMatrix<T>(getField(), nCols, nRows);
1263    
1264            // perform transpose block-wise, to ensure good cache behavior
1265            int blockIndex = 0;
1266            for (int iBlock = 0; iBlock < blockColumns; ++iBlock) {
1267                for (int jBlock = 0; jBlock < blockRows; ++jBlock) {
1268    
1269                    // transpose current block
1270                    final T[] outBlock = out.blocks[blockIndex];
1271                    final T[] tBlock   = blocks[jBlock * blockColumns + iBlock];
1272                    final int      pStart   = iBlock * BLOCK_SIZE;
1273                    final int      pEnd     = Math.min(pStart + BLOCK_SIZE, columns);
1274                    final int      qStart   = jBlock * BLOCK_SIZE;
1275                    final int      qEnd     = Math.min(qStart + BLOCK_SIZE, rows);
1276                    for (int p = pStart, k = 0; p < pEnd; ++p) {
1277                        final int lInc = pEnd - pStart;
1278                        for (int q = qStart, l = p - pStart; q < qEnd; ++q, l+= lInc) {
1279                            outBlock[k++] = tBlock[l];
1280                        }
1281                    }
1282    
1283                    // go to next block
1284                    ++blockIndex;
1285    
1286                }
1287            }
1288    
1289            return out;
1290    
1291        }
1292    
1293        /** {@inheritDoc} */
1294        @Override
1295        public int getRowDimension() {
1296            return rows;
1297        }
1298    
1299        /** {@inheritDoc} */
1300        @Override
1301        public int getColumnDimension() {
1302            return columns;
1303        }
1304    
1305        /** {@inheritDoc} */
1306        @Override
1307        public T[] operate(final T[] v)
1308            throws IllegalArgumentException {
1309    
1310            if (v.length != columns) {
1311                throw MathRuntimeException.createIllegalArgumentException(
1312                        "vector length mismatch: got {0} but expected {1}",
1313                        v.length, columns);
1314            }
1315            final T[] out = buildArray(getField(), rows);
1316            final T zero = getField().getZero();
1317    
1318            // perform multiplication block-wise, to ensure good cache behavior
1319            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1320                final int pStart = iBlock * BLOCK_SIZE;
1321                final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
1322                for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1323                    final T[] block  = blocks[iBlock * blockColumns + jBlock];
1324                    final int      qStart = jBlock * BLOCK_SIZE;
1325                    final int      qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
1326                    for (int p = pStart, k = 0; p < pEnd; ++p) {
1327                        T sum = zero;
1328                        int q = qStart;
1329                        while (q < qEnd - 3) {
1330                            sum = sum.
1331                                  add(block[k].multiply(v[q])).
1332                                  add(block[k + 1].multiply(v[q + 1])).
1333                                  add(block[k + 2].multiply(v[q + 2])).
1334                                  add(block[k + 3].multiply(v[q + 3]));
1335                            k += 4;
1336                            q += 4;
1337                        }
1338                        while (q < qEnd) {
1339                            sum = sum.add(block[k++].multiply(v[q++]));
1340                        }
1341                        out[p] = out[p].add(sum);
1342                    }
1343                }
1344            }
1345    
1346            return out;
1347    
1348        }
1349    
1350        /** {@inheritDoc} */
1351        @Override
1352        public T[] preMultiply(final T[] v)
1353            throws IllegalArgumentException {
1354    
1355            if (v.length != rows) {
1356                throw MathRuntimeException.createIllegalArgumentException(
1357                        "vector length mismatch: got {0} but expected {1}",
1358                        v.length, rows);
1359            }
1360            final T[] out = buildArray(getField(), columns);
1361            final T zero = getField().getZero();
1362    
1363            // perform multiplication block-wise, to ensure good cache behavior
1364            for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1365                final int jWidth  = blockWidth(jBlock);
1366                final int jWidth2 = jWidth  + jWidth;
1367                final int jWidth3 = jWidth2 + jWidth;
1368                final int jWidth4 = jWidth3 + jWidth;
1369                final int qStart = jBlock * BLOCK_SIZE;
1370                final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
1371                for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1372                    final T[] block  = blocks[iBlock * blockColumns + jBlock];
1373                    final int      pStart = iBlock * BLOCK_SIZE;
1374                    final int      pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
1375                    for (int q = qStart; q < qEnd; ++q) {
1376                        int k = q - qStart;
1377                        T sum = zero;
1378                        int p = pStart;
1379                        while (p < pEnd - 3) {
1380                            sum = sum.
1381                                  add(block[k].multiply(v[p])).
1382                                  add(block[k + jWidth].multiply(v[p + 1])).
1383                                  add(block[k + jWidth2].multiply(v[p + 2])).
1384                                  add(block[k + jWidth3].multiply(v[p + 3]));
1385                            k += jWidth4;
1386                            p += 4;
1387                        }
1388                        while (p < pEnd) {
1389                            sum = sum.add(block[k].multiply(v[p++]));
1390                            k += jWidth;
1391                        }
1392                        out[q] = out[q].add(sum);
1393                    }
1394                }
1395            }
1396    
1397            return out;
1398    
1399        }
1400    
1401        /** {@inheritDoc} */
1402        @Override
1403        public T walkInRowOrder(final FieldMatrixChangingVisitor<T> visitor)
1404            throws MatrixVisitorException {
1405            visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1406            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1407                final int pStart = iBlock * BLOCK_SIZE;
1408                final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
1409                for (int p = pStart; p < pEnd; ++p) {
1410                    for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1411                        final int jWidth = blockWidth(jBlock);
1412                        final int qStart = jBlock * BLOCK_SIZE;
1413                        final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
1414                        final T[] block = blocks[iBlock * blockColumns + jBlock];
1415                        for (int q = qStart, k = (p - pStart) * jWidth; q < qEnd; ++q, ++k) {
1416                            block[k] = visitor.visit(p, q, block[k]);
1417                        }
1418                    }
1419                 }
1420            }
1421            return visitor.end();
1422        }
1423    
1424        /** {@inheritDoc} */
1425        @Override
1426        public T walkInRowOrder(final FieldMatrixPreservingVisitor<T> visitor)
1427            throws MatrixVisitorException {
1428            visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1429            for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1430                final int pStart = iBlock * BLOCK_SIZE;
1431                final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
1432                for (int p = pStart; p < pEnd; ++p) {
1433                    for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1434                        final int jWidth = blockWidth(jBlock);
1435                        final int qStart = jBlock * BLOCK_SIZE;
1436                        final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
1437                        final T[] block = blocks[iBlock * blockColumns + jBlock];
1438                        for (int q = qStart, k = (p - pStart) * jWidth; q < qEnd; ++q, ++k) {
1439                            visitor.visit(p, q, block[k]);
1440                        }
1441                    }
1442                 }
1443            }
1444            return visitor.end();
1445        }
1446    
1447        /** {@inheritDoc} */
1448        @Override
1449        public T walkInRowOrder(final FieldMatrixChangingVisitor<T> visitor,
1450                                     final int startRow, final int endRow,
1451                                     final int startColumn, final int endColumn)
1452            throws MatrixIndexException, MatrixVisitorException {
1453            checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1454            visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1455            for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1456                final int p0     = iBlock * BLOCK_SIZE;
1457                final int pStart = Math.max(startRow, p0);
1458                final int pEnd   = Math.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1459                for (int p = pStart; p < pEnd; ++p) {
1460                    for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1461                        final int jWidth = blockWidth(jBlock);
1462                        final int q0     = jBlock * BLOCK_SIZE;
1463                        final int qStart = Math.max(startColumn, q0);
1464                        final int qEnd   = Math.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1465                        final T[] block = blocks[iBlock * blockColumns + jBlock];
1466                        for (int q = qStart, k = (p - p0) * jWidth + qStart - q0; q < qEnd; ++q, ++k) {
1467                            block[k] = visitor.visit(p, q, block[k]);
1468                        }
1469                    }
1470                 }
1471            }
1472            return visitor.end();
1473        }
1474    
1475        /** {@inheritDoc} */
1476        @Override
1477        public T walkInRowOrder(final FieldMatrixPreservingVisitor<T> visitor,
1478                                     final int startRow, final int endRow,
1479                                     final int startColumn, final int endColumn)
1480            throws MatrixIndexException, MatrixVisitorException {
1481            checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1482            visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1483            for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1484                final int p0     = iBlock * BLOCK_SIZE;
1485                final int pStart = Math.max(startRow, p0);
1486                final int pEnd   = Math.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1487                for (int p = pStart; p < pEnd; ++p) {
1488                    for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1489                        final int jWidth = blockWidth(jBlock);
1490                        final int q0     = jBlock * BLOCK_SIZE;
1491                        final int qStart = Math.max(startColumn, q0);
1492                        final int qEnd   = Math.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1493                        final T[] block = blocks[iBlock * blockColumns + jBlock];
1494                        for (int q = qStart, k = (p - p0) * jWidth + qStart - q0; q < qEnd; ++q, ++k) {
1495                            visitor.visit(p, q, block[k]);
1496                        }
1497                    }
1498                 }
1499            }
1500            return visitor.end();
1501        }
1502    
1503        /** {@inheritDoc} */
1504        @Override
1505        public T walkInOptimizedOrder(final FieldMatrixChangingVisitor<T> visitor)
1506            throws MatrixVisitorException {
1507            visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1508            for (int iBlock = 0, blockIndex = 0; iBlock < blockRows; ++iBlock) {
1509                final int pStart = iBlock * BLOCK_SIZE;
1510                final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
1511                for (int jBlock = 0; jBlock < blockColumns; ++jBlock, ++blockIndex) {
1512                    final int qStart = jBlock * BLOCK_SIZE;
1513                    final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
1514                    final T[] block = blocks[blockIndex];
1515                    for (int p = pStart, k = 0; p < pEnd; ++p) {
1516                        for (int q = qStart; q < qEnd; ++q, ++k) {
1517                            block[k] = visitor.visit(p, q, block[k]);
1518                        }
1519                    }
1520                }
1521            }
1522            return visitor.end();
1523        }
1524    
1525        /** {@inheritDoc} */
1526        @Override
1527        public T walkInOptimizedOrder(final FieldMatrixPreservingVisitor<T> visitor)
1528            throws MatrixVisitorException {
1529            visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1530            for (int iBlock = 0, blockIndex = 0; iBlock < blockRows; ++iBlock) {
1531                final int pStart = iBlock * BLOCK_SIZE;
1532                final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
1533                for (int jBlock = 0; jBlock < blockColumns; ++jBlock, ++blockIndex) {
1534                    final int qStart = jBlock * BLOCK_SIZE;
1535                    final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
1536                    final T[] block = blocks[blockIndex];
1537                    for (int p = pStart, k = 0; p < pEnd; ++p) {
1538                        for (int q = qStart; q < qEnd; ++q, ++k) {
1539                            visitor.visit(p, q, block[k]);
1540                        }
1541                    }
1542                }
1543            }
1544            return visitor.end();
1545        }
1546    
1547        /** {@inheritDoc} */
1548        @Override
1549        public T walkInOptimizedOrder(final FieldMatrixChangingVisitor<T> visitor,
1550                                           final int startRow, final int endRow,
1551                                           final int startColumn, final int endColumn)
1552            throws MatrixIndexException, MatrixVisitorException {
1553            checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1554            visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1555            for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1556                final int p0     = iBlock * BLOCK_SIZE;
1557                final int pStart = Math.max(startRow, p0);
1558                final int pEnd   = Math.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1559                for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1560                    final int jWidth = blockWidth(jBlock);
1561                    final int q0     = jBlock * BLOCK_SIZE;
1562                    final int qStart = Math.max(startColumn, q0);
1563                    final int qEnd   = Math.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1564                    final T[] block = blocks[iBlock * blockColumns + jBlock];
1565                    for (int p = pStart; p < pEnd; ++p) {
1566                        for (int q = qStart, k = (p - p0) * jWidth + qStart - q0; q < qEnd; ++q, ++k) {
1567                            block[k] = visitor.visit(p, q, block[k]);
1568                        }
1569                    }
1570                }
1571            }
1572            return visitor.end();
1573        }
1574    
1575        /** {@inheritDoc} */
1576        @Override
1577        public T walkInOptimizedOrder(final FieldMatrixPreservingVisitor<T> visitor,
1578                                           final int startRow, final int endRow,
1579                                           final int startColumn, final int endColumn)
1580            throws MatrixIndexException, MatrixVisitorException {
1581            checkSubMatrixIndex(startRow, endRow, startColumn, endColumn);
1582            visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1583            for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1584                final int p0     = iBlock * BLOCK_SIZE;
1585                final int pStart = Math.max(startRow, p0);
1586                final int pEnd   = Math.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1587                for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1588                    final int jWidth = blockWidth(jBlock);
1589                    final int q0     = jBlock * BLOCK_SIZE;
1590                    final int qStart = Math.max(startColumn, q0);
1591                    final int qEnd   = Math.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1592                    final T[] block = blocks[iBlock * blockColumns + jBlock];
1593                    for (int p = pStart; p < pEnd; ++p) {
1594                        for (int q = qStart, k = (p - p0) * jWidth + qStart - q0; q < qEnd; ++q, ++k) {
1595                            visitor.visit(p, q, block[k]);
1596                        }
1597                    }
1598                }
1599            }
1600            return visitor.end();
1601        }
1602    
1603        /**
1604         * Get the height of a block.
1605         * @param blockRow row index (in block sense) of the block
1606         * @return height (number of rows) of the block
1607         */
1608        private int blockHeight(final int blockRow) {
1609            return (blockRow == blockRows - 1) ? rows - blockRow * BLOCK_SIZE : BLOCK_SIZE;
1610        }
1611    
1612        /**
1613         * Get the width of a block.
1614         * @param blockColumn column index (in block sense) of the block
1615         * @return width (number of columns) of the block
1616         */
1617        private int blockWidth(final int blockColumn) {
1618            return (blockColumn == blockColumns - 1) ? columns - blockColumn * BLOCK_SIZE : BLOCK_SIZE;
1619        }
1620    
1621    }