View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.math.linear;
19  
20  import java.io.Serializable;
21  import java.util.Arrays;
22  
23  import org.apache.commons.math.MathRuntimeException;
24  
25  /**
26   * Cache-friendly implementation of RealMatrix using a flat arrays to store
27   * square blocks of the matrix.
28   * <p>
29   * This implementation is specially designed to be cache-friendly. Square blocks are
30   * stored as small arrays and allow efficient traversal of data both in row major direction
31   * and columns major direction, one block at a time. This greatly increases performances
32   * for algorithms that use crossed directions loops like multiplication or transposition.
33   * </p>
34   * <p>
35   * The size of square blocks is a static parameter. It may be tuned according to the cache
36   * size of the target computer processor. As a rule of thumbs, it should be the largest
37   * value that allows three blocks to be simultaneously cached (this is necessary for example
38   * for matrix multiplication). The default value is to use 52x52 blocks which is well suited
39   * for processors with 64k L1 cache (one block holds 2704 values or 21632 bytes). This value
40   * could be lowered to 36x36 for processors with 32k L1 cache.
41   * </p>
42   * <p>
43   * The regular blocks represent {@link #BLOCK_SIZE} x {@link #BLOCK_SIZE} squares. Blocks
44   * at right hand side and bottom side which may be smaller to fit matrix dimensions. The square
45   * blocks are flattened in row major order in single dimension arrays which are therefore
46   * {@link #BLOCK_SIZE}<sup>2</sup> elements long for regular blocks. The blocks are themselves
47   * organized in row major order.
48   * </p>
49   * <p>
50   * As an example, for a block size of 52x52, a 100x60 matrix would be stored in 4 blocks.
51   * Block 0 would be a double[2704] array holding the upper left 52x52 square, block 1 would be
52   * a double[416] array holding the upper right 52x8 rectangle, block 2 would be a double[2496]
53   * array holding the lower left 48x52 rectangle and block 3 would be a double[384] array
54   * holding the lower right 48x8 rectangle.
55   * </p>
56   * <p>
57   * The layout complexity overhead versus simple mapping of matrices to java
58   * arrays is negligible for small matrices (about 1%). The gain from cache efficiency leads
59   * to up to 3-fold improvements for matrices of moderate to large size.
60   * </p>
61   * @version $Revision: 783741 $ $Date: 2009-06-11 08:35:42 -0400 (Thu, 11 Jun 2009) $
62   * @since 2.0
63   */
64  public class BlockRealMatrix extends AbstractRealMatrix implements Serializable {
65      
66      /** Serializable version identifier */
67      private static final long serialVersionUID = 4991895511313664478L;
68  
69      /** Block size. */
70      public static final int BLOCK_SIZE = 52;
71  
72      /** Blocks of matrix entries. */
73      private final double blocks[][];
74  
75      /** Number of rows of the matrix. */
76      private final int rows;
77  
78      /** Number of columns of the matrix. */
79      private final int columns;
80  
81      /** Number of block rows of the matrix. */
82      private final int blockRows;
83  
84      /** Number of block columns of the matrix. */
85      private final int blockColumns;
86  
87      /**
88       * Create a new matrix with the supplied row and column dimensions.
89       *
90       * @param rows  the number of rows in the new matrix
91       * @param columns  the number of columns in the new matrix
92       * @throws IllegalArgumentException if row or column dimension is not
93       *  positive
94       */
95      public BlockRealMatrix(final int rows, final int columns)
96          throws IllegalArgumentException {
97  
98          super(rows, columns);
99          this.rows    = rows;
100         this.columns = columns;
101 
102         // number of blocks
103         blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
104         blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
105 
106         // allocate storage blocks, taking care of smaller ones at right and bottom
107         blocks = createBlocksLayout(rows, columns);
108 
109     }
110 
111     /**
112      * Create a new dense matrix copying entries from raw layout data.
113      * <p>The input array <em>must</em> already be in raw layout.</p>
114      * <p>Calling this constructor is equivalent to call:
115      * <pre>matrix = new BlockRealMatrix(rawData.length, rawData[0].length,
116      *                                   toBlocksLayout(rawData), false);</pre>
117      * </p>
118      * @param rawData data for new matrix, in raw layout
119      *
120      * @exception IllegalArgumentException if <code>blockData</code> shape is
121      * inconsistent with block layout
122      * @see #BlockRealMatrix(int, int, double[][], boolean)
123      */
124     public BlockRealMatrix(final double[][] rawData)
125         throws IllegalArgumentException {
126         this(rawData.length, rawData[0].length, toBlocksLayout(rawData), false);
127     }
128 
129     /**
130      * Create a new dense matrix copying entries from block layout data.
131      * <p>The input array <em>must</em> already be in blocks layout.</p>
132      * @param rows  the number of rows in the new matrix
133      * @param columns  the number of columns in the new matrix
134      * @param blockData data for new matrix
135      * @param copyArray if true, the input array will be copied, otherwise
136      * it will be referenced
137      *
138      * @exception IllegalArgumentException if <code>blockData</code> shape is
139      * inconsistent with block layout
140      * @see #createBlocksLayout(int, int)
141      * @see #toBlocksLayout(double[][])
142      * @see #BlockRealMatrix(double[][])
143      */
144     public BlockRealMatrix(final int rows, final int columns,
145                            final double[][] blockData, final boolean copyArray)
146         throws IllegalArgumentException {
147 
148         super(rows, columns);
149         this.rows    = rows;
150         this.columns = columns;
151 
152         // number of blocks
153         blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
154         blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
155 
156         if (copyArray) {
157             // allocate storage blocks, taking care of smaller ones at right and bottom
158             blocks = new double[blockRows * blockColumns][];
159         } else {
160             // reference existing array
161             blocks = blockData;
162         }
163 
164         int index = 0;
165         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
166             final int iHeight = blockHeight(iBlock);
167             for (int jBlock = 0; jBlock < blockColumns; ++jBlock, ++index) {
168                 if (blockData[index].length != iHeight * blockWidth(jBlock)) {
169                     throw MathRuntimeException.createIllegalArgumentException(
170                             "wrong array shape (block length = {0}, expected {1})",
171                             blockData[index].length, iHeight * blockWidth(jBlock));
172                 }
173                 if (copyArray) {
174                     blocks[index] = blockData[index].clone();
175                 }
176             }
177         }
178 
179     }
180 
181     /**
182      * Convert a data array from raw layout to blocks layout.
183      * <p>
184      * Raw layout is the straightforward layout where element at row i and
185      * column j is in array element <code>rawData[i][j]</code>. Blocks layout
186      * is the layout used in {@link BlockRealMatrix} instances, where the matrix
187      * is split in square blocks (except at right and bottom side where blocks may
188      * be rectangular to fit matrix size) and each block is stored in a flattened
189      * one-dimensional array.
190      * </p>
191      * <p>
192      * This method creates an array in blocks layout from an input array in raw layout.
193      * It can be used to provide the array argument of the {@link
194      * #BlockRealMatrix(int, int, double[][], boolean)} constructor.
195      * </p>
196      * @param rawData data array in raw layout
197      * @return a new data array containing the same entries but in blocks layout
198      * @exception IllegalArgumentException if <code>rawData</code> is not rectangular
199      *  (not all rows have the same length)
200      * @see #createBlocksLayout(int, int)
201      * @see #BlockRealMatrix(int, int, double[][], boolean)
202      */
203     public static double[][] toBlocksLayout(final double[][] rawData)
204         throws IllegalArgumentException {
205 
206         final int rows         = rawData.length;
207         final int columns      = rawData[0].length;
208         final int blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
209         final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
210 
211         // safety checks
212         for (int i = 0; i < rawData.length; ++i) {
213             final int length = rawData[i].length;
214             if (length != columns) {
215                 throw MathRuntimeException.createIllegalArgumentException(
216                         "some rows have length {0} while others have length {1}",
217                         columns, length); 
218             }
219         }
220 
221         // convert array
222         final double[][] blocks = new double[blockRows * blockColumns][];
223         for (int iBlock = 0, blockIndex = 0; iBlock < blockRows; ++iBlock) {
224             final int pStart  = iBlock * BLOCK_SIZE;
225             final int pEnd    = Math.min(pStart + BLOCK_SIZE, rows);
226             final int iHeight = pEnd - pStart;
227             for (int jBlock = 0; jBlock < blockColumns; ++jBlock, ++blockIndex) {
228                 final int qStart = jBlock * BLOCK_SIZE;
229                 final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
230                 final int jWidth = qEnd - qStart;
231 
232                 // allocate new block
233                 final double[] block = new double[iHeight * jWidth];
234                 blocks[blockIndex] = block;
235 
236                 // copy data
237                 for (int p = pStart, index = 0; p < pEnd; ++p, index += jWidth) {
238                     System.arraycopy(rawData[p], qStart, block, index, jWidth);
239                 }
240 
241             }
242         }
243 
244         return blocks;
245 
246     }
247 
248     /**
249      * Create a data array in blocks layout.
250      * <p>
251      * This method can be used to create the array argument of the {@link
252      * #BlockRealMatrix(int, int, double[][], boolean)} constructor.
253      * </p>
254      * @param rows  the number of rows in the new matrix
255      * @param columns  the number of columns in the new matrix
256      * @return a new data array in blocks layout
257      * @see #toBlocksLayout(double[][])
258      * @see #BlockRealMatrix(int, int, double[][], boolean)
259      */
260     public static double[][] createBlocksLayout(final int rows, final int columns) {
261 
262         final int blockRows    = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
263         final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
264 
265         final double[][] blocks = new double[blockRows * blockColumns][];
266         for (int iBlock = 0, blockIndex = 0; iBlock < blockRows; ++iBlock) {
267             final int pStart  = iBlock * BLOCK_SIZE;
268             final int pEnd    = Math.min(pStart + BLOCK_SIZE, rows);
269             final int iHeight = pEnd - pStart;
270             for (int jBlock = 0; jBlock < blockColumns; ++jBlock, ++blockIndex) {
271                 final int qStart = jBlock * BLOCK_SIZE;
272                 final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
273                 final int jWidth = qEnd - qStart;
274                 blocks[blockIndex] = new double[iHeight * jWidth];
275             }
276         }
277 
278         return blocks;
279 
280     }
281 
282     /** {@inheritDoc} */
283     @Override
284     public BlockRealMatrix createMatrix(final int rowDimension, final int columnDimension)
285         throws IllegalArgumentException {
286         return new BlockRealMatrix(rowDimension, columnDimension);
287     }
288 
289     /** {@inheritDoc} */
290     @Override
291     public BlockRealMatrix copy() {
292 
293         // create an empty matrix
294         BlockRealMatrix copied = new BlockRealMatrix(rows, columns);
295 
296         // copy the blocks
297         for (int i = 0; i < blocks.length; ++i) {
298             System.arraycopy(blocks[i], 0, copied.blocks[i], 0, blocks[i].length);
299         }
300 
301         return copied;
302 
303     }
304 
305     /** {@inheritDoc} */
306     @Override
307     public BlockRealMatrix add(final RealMatrix m)
308         throws IllegalArgumentException {
309         try {
310             return add((BlockRealMatrix) m);
311         } catch (ClassCastException cce) {
312 
313             // safety check
314             MatrixUtils.checkAdditionCompatible(this, m);
315 
316             final BlockRealMatrix out = new BlockRealMatrix(rows, columns);
317 
318             // perform addition block-wise, to ensure good cache behavior
319             int blockIndex = 0;
320             for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
321                 for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
322 
323                     // perform addition on the current block
324                     final double[] outBlock = out.blocks[blockIndex];
325                     final double[] tBlock   = blocks[blockIndex];
326                     final int      pStart   = iBlock * BLOCK_SIZE;
327                     final int      pEnd     = Math.min(pStart + BLOCK_SIZE, rows);
328                     final int      qStart   = jBlock * BLOCK_SIZE;
329                     final int      qEnd     = Math.min(qStart + BLOCK_SIZE, columns);
330                     for (int p = pStart, k = 0; p < pEnd; ++p) {
331                         for (int q = qStart; q < qEnd; ++q, ++k) {
332                             outBlock[k] = tBlock[k] + m.getEntry(p, q);
333                         }
334                     }
335 
336                     // go to next block
337                     ++blockIndex;
338 
339                 }
340             }
341 
342             return out;
343 
344         }
345     }
346 
347     /**
348      * Compute the sum of this and <code>m</code>.
349      *
350      * @param m    matrix to be added
351      * @return     this + m
352      * @throws  IllegalArgumentException if m is not the same size as this
353      */
354     public BlockRealMatrix add(final BlockRealMatrix m)
355         throws IllegalArgumentException {
356 
357         // safety check
358         MatrixUtils.checkAdditionCompatible(this, m);
359 
360         final BlockRealMatrix out = new BlockRealMatrix(rows, columns);
361 
362         // perform addition block-wise, to ensure good cache behavior
363         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
364             final double[] outBlock = out.blocks[blockIndex];
365             final double[] tBlock   = blocks[blockIndex];
366             final double[] mBlock   = m.blocks[blockIndex];
367             for (int k = 0; k < outBlock.length; ++k) {
368                 outBlock[k] = tBlock[k] + mBlock[k];
369             }
370         }
371 
372         return out;
373 
374     }
375 
376     /** {@inheritDoc} */
377     @Override
378     public BlockRealMatrix subtract(final RealMatrix m)
379         throws IllegalArgumentException {
380         try {
381             return subtract((BlockRealMatrix) m);
382         } catch (ClassCastException cce) {
383 
384             // safety check
385             MatrixUtils.checkSubtractionCompatible(this, m);
386 
387             final BlockRealMatrix out = new BlockRealMatrix(rows, columns);
388 
389             // perform subtraction block-wise, to ensure good cache behavior
390             int blockIndex = 0;
391             for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
392                 for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
393 
394                     // perform subtraction on the current block
395                     final double[] outBlock = out.blocks[blockIndex];
396                     final double[] tBlock   = blocks[blockIndex];
397                     final int      pStart   = iBlock * BLOCK_SIZE;
398                     final int      pEnd     = Math.min(pStart + BLOCK_SIZE, rows);
399                     final int      qStart   = jBlock * BLOCK_SIZE;
400                     final int      qEnd     = Math.min(qStart + BLOCK_SIZE, columns);
401                     for (int p = pStart, k = 0; p < pEnd; ++p) {
402                         for (int q = qStart; q < qEnd; ++q, ++k) {
403                             outBlock[k] = tBlock[k] - m.getEntry(p, q);
404                         }
405                     }
406 
407                     // go to next block
408                     ++blockIndex;
409 
410                 }
411             }
412 
413             return out;
414 
415         }
416     }
417 
418     /**
419      * Compute this minus <code>m</code>.
420      *
421      * @param m    matrix to be subtracted
422      * @return     this - m
423      * @throws  IllegalArgumentException if m is not the same size as this
424      */
425     public BlockRealMatrix subtract(final BlockRealMatrix m)
426         throws IllegalArgumentException {
427 
428         // safety check
429         MatrixUtils.checkSubtractionCompatible(this, m);
430 
431         final BlockRealMatrix out = new BlockRealMatrix(rows, columns);
432 
433         // perform subtraction block-wise, to ensure good cache behavior
434         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
435             final double[] outBlock = out.blocks[blockIndex];
436             final double[] tBlock   = blocks[blockIndex];
437             final double[] mBlock   = m.blocks[blockIndex];
438             for (int k = 0; k < outBlock.length; ++k) {
439                 outBlock[k] = tBlock[k] - mBlock[k];
440             }
441         }
442 
443         return out;
444 
445     }
446 
447     /** {@inheritDoc} */
448     @Override
449     public BlockRealMatrix scalarAdd(final double d)
450         throws IllegalArgumentException {
451 
452         final BlockRealMatrix out = new BlockRealMatrix(rows, columns);
453 
454         // perform subtraction block-wise, to ensure good cache behavior
455         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
456             final double[] outBlock = out.blocks[blockIndex];
457             final double[] tBlock   = blocks[blockIndex];
458             for (int k = 0; k < outBlock.length; ++k) {
459                 outBlock[k] = tBlock[k] + d;
460             }
461         }
462 
463         return out;
464 
465     }
466 
467     /** {@inheritDoc} */
468     @Override
469     public RealMatrix scalarMultiply(final double d)
470         throws IllegalArgumentException {
471 
472         final BlockRealMatrix out = new BlockRealMatrix(rows, columns);
473 
474         // perform subtraction block-wise, to ensure good cache behavior
475         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
476             final double[] outBlock = out.blocks[blockIndex];
477             final double[] tBlock   = blocks[blockIndex];
478             for (int k = 0; k < outBlock.length; ++k) {
479                 outBlock[k] = tBlock[k] * d;
480             }
481         }
482 
483         return out;
484 
485     }
486 
487     /** {@inheritDoc} */
488     @Override
489     public BlockRealMatrix multiply(final RealMatrix m)
490         throws IllegalArgumentException {
491         try {
492             return multiply((BlockRealMatrix) m);
493         } catch (ClassCastException cce) {
494 
495             // safety check
496             MatrixUtils.checkMultiplicationCompatible(this, m);
497 
498             final BlockRealMatrix out = new BlockRealMatrix(rows, m.getColumnDimension());
499 
500             // perform multiplication block-wise, to ensure good cache behavior
501             int blockIndex = 0;
502             for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
503 
504                 final int pStart = iBlock * BLOCK_SIZE;
505                 final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
506 
507                 for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
508 
509                     final int qStart = jBlock * BLOCK_SIZE;
510                     final int qEnd   = Math.min(qStart + BLOCK_SIZE, m.getColumnDimension());
511 
512                     // select current block
513                     final double[] outBlock = out.blocks[blockIndex];
514 
515                     // perform multiplication on current block
516                     for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
517                         final int kWidth      = blockWidth(kBlock);
518                         final double[] tBlock = blocks[iBlock * blockColumns + kBlock];
519                         final int rStart      = kBlock * BLOCK_SIZE;
520                         for (int p = pStart, k = 0; p < pEnd; ++p) {
521                             final int lStart = (p - pStart) * kWidth;
522                             final int lEnd   = lStart + kWidth;
523                             for (int q = qStart; q < qEnd; ++q) {
524                                 double sum = 0;
525                                 for (int l = lStart, r = rStart; l < lEnd; ++l, ++r) {
526                                     sum += tBlock[l] * m.getEntry(r, q);
527                                 }
528                                 outBlock[k++] += sum;
529                             }
530                         }
531                     }
532 
533                     // go to next block
534                     ++blockIndex;
535 
536                 }
537             }
538 
539             return out;
540 
541         }
542     }
543 
544     /**
545      * Returns the result of postmultiplying this by m.
546      *
547      * @param m    matrix to postmultiply by
548      * @return     this * m
549      * @throws     IllegalArgumentException
550      *             if columnDimension(this) != rowDimension(m)
551      */
552     public BlockRealMatrix multiply(BlockRealMatrix m) throws IllegalArgumentException {
553 
554         // safety check
555         MatrixUtils.checkMultiplicationCompatible(this, m);
556 
557         final BlockRealMatrix out = new BlockRealMatrix(rows, m.columns);
558 
559         // perform multiplication block-wise, to ensure good cache behavior
560         int blockIndex = 0;
561         for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
562 
563             final int pStart = iBlock * BLOCK_SIZE;
564             final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
565 
566             for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
567                 final int jWidth = out.blockWidth(jBlock);
568                 final int jWidth2 = jWidth  + jWidth;
569                 final int jWidth3 = jWidth2 + jWidth;
570                 final int jWidth4 = jWidth3 + jWidth;
571 
572                 // select current block
573                 final double[] outBlock = out.blocks[blockIndex];
574 
575                 // perform multiplication on current block
576                 for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
577                     final int kWidth = blockWidth(kBlock);
578                     final double[] tBlock = blocks[iBlock * blockColumns + kBlock];
579                     final double[] mBlock = m.blocks[kBlock * m.blockColumns + jBlock];
580                     for (int p = pStart, k = 0; p < pEnd; ++p) {
581                         final int lStart = (p - pStart) * kWidth;
582                         final int lEnd   = lStart + kWidth;
583                         for (int nStart = 0; nStart < jWidth; ++nStart) {
584                             double sum = 0;
585                             int l = lStart;
586                             int n = nStart;
587                             while (l < lEnd - 3) {
588                                 sum += tBlock[l] * mBlock[n] +
589                                        tBlock[l + 1] * mBlock[n + jWidth] +
590                                        tBlock[l + 2] * mBlock[n + jWidth2] +
591                                        tBlock[l + 3] * mBlock[n + jWidth3];
592                                 l += 4;
593                                 n += jWidth4;
594                             }
595                             while (l < lEnd) {
596                                 sum += tBlock[l++] * mBlock[n];
597                                 n += jWidth;
598                             }
599                             outBlock[k++] += sum;
600                         }
601                     }
602                 }
603 
604                 // go to next block
605                 ++blockIndex;
606 
607             }
608         }
609 
610         return out;
611 
612     }
613 
614     /** {@inheritDoc} */
615     @Override
616     public double[][] getData() {
617 
618         final double[][] data = new double[getRowDimension()][getColumnDimension()];
619         final int lastColumns = columns - (blockColumns - 1) * BLOCK_SIZE;
620 
621         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
622             final int pStart = iBlock * BLOCK_SIZE;
623             final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
624             int regularPos   = 0;
625             int lastPos      = 0;
626             for (int p = pStart; p < pEnd; ++p) {
627                 final double[] dataP = data[p];
628                 int blockIndex = iBlock * blockColumns;
629                 int dataPos    = 0;
630                 for (int jBlock = 0; jBlock < blockColumns - 1; ++jBlock) {
631                     System.arraycopy(blocks[blockIndex++], regularPos, dataP, dataPos, BLOCK_SIZE);
632                     dataPos += BLOCK_SIZE;
633                 }
634                 System.arraycopy(blocks[blockIndex], lastPos, dataP, dataPos, lastColumns);
635                 regularPos += BLOCK_SIZE;
636                 lastPos    += lastColumns;
637             }
638         }
639 
640         return data;
641         
642     }
643 
644     /** {@inheritDoc} */
645     @Override
646     public double getNorm() {
647         final double[] colSums = new double[BLOCK_SIZE];
648         double maxColSum = 0;
649         for (int jBlock = 0; jBlock < blockColumns; jBlock++) {
650             final int jWidth = blockWidth(jBlock);
651             Arrays.fill(colSums, 0, jWidth, 0.0);
652             for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
653                 final int iHeight = blockHeight(iBlock);
654                 final double[] block = blocks[iBlock * blockColumns + jBlock];
655                 for (int j = 0; j < jWidth; ++j) {
656                     double sum = 0;
657                     for (int i = 0; i < iHeight; ++i) {
658                         sum += Math.abs(block[i * jWidth + j]);
659                     }
660                     colSums[j] += sum;
661                 }
662             }
663             for (int j = 0; j < jWidth; ++j) {
664                 maxColSum = Math.max(maxColSum, colSums[j]);
665             }
666         }
667         return maxColSum;
668     }
669     
670     /** {@inheritDoc} */
671     @Override
672     public double getFrobeniusNorm() {
673         double sum2 = 0;
674         for (int blockIndex = 0; blockIndex < blocks.length; ++blockIndex) {
675             for (final double entry : blocks[blockIndex]) {
676                 sum2 += entry * entry;
677             }
678         }
679         return Math.sqrt(sum2);
680     }
681 
682     /** {@inheritDoc} */
683     @Override
684     public BlockRealMatrix getSubMatrix(final int startRow, final int endRow,
685                                    final int startColumn, final int endColumn)
686         throws MatrixIndexException {
687 
688         // safety checks
689         MatrixUtils.checkSubMatrixIndex(this, startRow, endRow, startColumn, endColumn);
690 
691         // create the output matrix
692         final BlockRealMatrix out =
693             new BlockRealMatrix(endRow - startRow + 1, endColumn - startColumn + 1);
694 
695         // compute blocks shifts
696         final int blockStartRow    = startRow    / BLOCK_SIZE;
697         final int rowsShift        = startRow    % BLOCK_SIZE;
698         final int blockStartColumn = startColumn / BLOCK_SIZE;
699         final int columnsShift     = startColumn % BLOCK_SIZE;
700 
701         // perform extraction block-wise, to ensure good cache behavior
702         for (int iBlock = 0, pBlock = blockStartRow; iBlock < out.blockRows; ++iBlock, ++pBlock) {
703             final int iHeight = out.blockHeight(iBlock);
704             for (int jBlock = 0, qBlock = blockStartColumn; jBlock < out.blockColumns; ++jBlock, ++qBlock) {
705                 final int jWidth = out.blockWidth(jBlock);
706 
707                 // handle one block of the output matrix
708                 final int      outIndex = iBlock * out.blockColumns + jBlock;
709                 final double[] outBlock = out.blocks[outIndex];
710                 final int      index    = pBlock * blockColumns + qBlock;
711                 final int      width    = blockWidth(qBlock);
712 
713                 final int heightExcess = iHeight + rowsShift - BLOCK_SIZE;
714                 final int widthExcess  = jWidth + columnsShift - BLOCK_SIZE;
715                 if (heightExcess > 0) {
716                     // the submatrix block spans on two blocks rows from the original matrix
717                     if (widthExcess > 0) {
718                         // the submatrix block spans on two blocks columns from the original matrix
719                         final int width2 = blockWidth(qBlock + 1);
720                         copyBlockPart(blocks[index], width,
721                                       rowsShift, BLOCK_SIZE,
722                                       columnsShift, BLOCK_SIZE,
723                                       outBlock, jWidth, 0, 0);
724                         copyBlockPart(blocks[index + 1], width2,
725                                       rowsShift, BLOCK_SIZE,
726                                       0, widthExcess,
727                                       outBlock, jWidth, 0, jWidth - widthExcess);
728                         copyBlockPart(blocks[index + blockColumns], width,
729                                       0, heightExcess,
730                                       columnsShift, BLOCK_SIZE,
731                                       outBlock, jWidth, iHeight - heightExcess, 0);
732                         copyBlockPart(blocks[index + blockColumns + 1], width2,
733                                       0, heightExcess,
734                                       0, widthExcess,
735                                       outBlock, jWidth, iHeight - heightExcess, jWidth - widthExcess);
736                     } else {
737                         // the submatrix block spans on one block column from the original matrix
738                         copyBlockPart(blocks[index], width,
739                                       rowsShift, BLOCK_SIZE,
740                                       columnsShift, jWidth + columnsShift,
741                                       outBlock, jWidth, 0, 0);
742                         copyBlockPart(blocks[index + blockColumns], width,
743                                       0, heightExcess,
744                                       columnsShift, jWidth + columnsShift,
745                                       outBlock, jWidth, iHeight - heightExcess, 0);
746                     }
747                 } else {
748                     // the submatrix block spans on one block row from the original matrix
749                     if (widthExcess > 0) {
750                         // the submatrix block spans on two blocks columns from the original matrix
751                         final int width2 = blockWidth(qBlock + 1);
752                         copyBlockPart(blocks[index], width,
753                                       rowsShift, iHeight + rowsShift,
754                                       columnsShift, BLOCK_SIZE,
755                                       outBlock, jWidth, 0, 0);
756                         copyBlockPart(blocks[index + 1], width2,
757                                       rowsShift, iHeight + rowsShift,
758                                       0, widthExcess,
759                                       outBlock, jWidth, 0, jWidth - widthExcess);
760                     } else {
761                         // the submatrix block spans on one block column from the original matrix
762                         copyBlockPart(blocks[index], width,
763                                       rowsShift, iHeight + rowsShift,
764                                       columnsShift, jWidth + columnsShift,
765                                       outBlock, jWidth, 0, 0);
766                     }
767                }
768 
769             }
770         }
771 
772         return out;
773 
774     }
775 
776     /**
777      * Copy a part of a block into another one
778      * <p>This method can be called only when the specified part fits in both
779      * blocks, no verification is done here.</p>
780      * @param srcBlock source block
781      * @param srcWidth source block width ({@link #BLOCK_SIZE} or smaller)
782      * @param srcStartRow start row in the source block
783      * @param srcEndRow end row (exclusive) in the source block
784      * @param srcStartColumn start column in the source block
785      * @param srcEndColumn end column (exclusive) in the source block
786      * @param dstBlock destination block
787      * @param dstWidth destination block width ({@link #BLOCK_SIZE} or smaller)
788      * @param dstStartRow start row in the destination block
789      * @param dstStartColumn start column in the destination block
790      */
791     private void copyBlockPart(final double[] srcBlock, final int srcWidth,
792                                final int srcStartRow, final int srcEndRow,
793                                final int srcStartColumn, final int srcEndColumn,
794                                final double[] dstBlock, final int dstWidth,
795                                final int dstStartRow, final int dstStartColumn) {
796         final int length = srcEndColumn - srcStartColumn;
797         int srcPos = srcStartRow * srcWidth + srcStartColumn;
798         int dstPos = dstStartRow * dstWidth + dstStartColumn;
799         for (int srcRow = srcStartRow; srcRow < srcEndRow; ++srcRow) {
800             System.arraycopy(srcBlock, srcPos, dstBlock, dstPos, length);
801             srcPos += srcWidth;
802             dstPos += dstWidth;
803         }
804     }
805 
806     /** {@inheritDoc} */
807     @Override
808     public void setSubMatrix(final double[][] subMatrix, final int row, final int column)
809         throws MatrixIndexException {
810 
811         // safety checks
812         final int refLength = subMatrix[0].length;
813         if (refLength < 1) {
814             throw MathRuntimeException.createIllegalArgumentException("matrix must have at least one column");             
815         }
816         final int endRow    = row + subMatrix.length - 1;
817         final int endColumn = column + refLength - 1;
818         MatrixUtils.checkSubMatrixIndex(this, row, endRow, column, endColumn);
819         for (final double[] subRow : subMatrix) {
820             if (subRow.length != refLength) {
821                 throw MathRuntimeException.createIllegalArgumentException(
822                         "some rows have length {0} while others have length {1}",
823                         refLength, subRow.length); 
824             }
825         }
826 
827         // compute blocks bounds
828         final int blockStartRow    = row / BLOCK_SIZE;
829         final int blockEndRow      = (endRow + BLOCK_SIZE) / BLOCK_SIZE;
830         final int blockStartColumn = column / BLOCK_SIZE;
831         final int blockEndColumn   = (endColumn + BLOCK_SIZE) / BLOCK_SIZE;
832 
833         // perform copy block-wise, to ensure good cache behavior
834         for (int iBlock = blockStartRow; iBlock < blockEndRow; ++iBlock) {
835             final int iHeight  = blockHeight(iBlock);
836             final int firstRow = iBlock * BLOCK_SIZE;
837             final int iStart   = Math.max(row,    firstRow);
838             final int iEnd     = Math.min(endRow + 1, firstRow + iHeight);
839 
840             for (int jBlock = blockStartColumn; jBlock < blockEndColumn; ++jBlock) {
841                 final int jWidth      = blockWidth(jBlock);
842                 final int firstColumn = jBlock * BLOCK_SIZE;
843                 final int jStart      = Math.max(column,    firstColumn);
844                 final int jEnd        = Math.min(endColumn + 1, firstColumn + jWidth);
845                 final int jLength     = jEnd - jStart;
846 
847                 // handle one block, row by row
848                 final double[] block = blocks[iBlock * blockColumns + jBlock];
849                 for (int i = iStart; i < iEnd; ++i) {
850                     System.arraycopy(subMatrix[i - row], jStart - column,
851                                      block, (i - firstRow) * jWidth + (jStart - firstColumn),
852                                      jLength);
853                 }
854 
855             }
856         }
857     }
858 
859     /** {@inheritDoc} */
860     @Override
861     public BlockRealMatrix getRowMatrix(final int row)
862         throws MatrixIndexException {
863 
864         MatrixUtils.checkRowIndex(this, row);
865         final BlockRealMatrix out = new BlockRealMatrix(1, columns);
866 
867         // perform copy block-wise, to ensure good cache behavior
868         final int iBlock  = row / BLOCK_SIZE;
869         final int iRow    = row - iBlock * BLOCK_SIZE;
870         int outBlockIndex = 0;
871         int outIndex      = 0;
872         double[] outBlock = out.blocks[outBlockIndex];
873         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
874             final int jWidth     = blockWidth(jBlock);
875             final double[] block = blocks[iBlock * blockColumns + jBlock];
876             final int available  = outBlock.length - outIndex;
877             if (jWidth > available) {
878                 System.arraycopy(block, iRow * jWidth, outBlock, outIndex, available);
879                 outBlock = out.blocks[++outBlockIndex];
880                 System.arraycopy(block, iRow * jWidth, outBlock, 0, jWidth - available);
881                 outIndex = jWidth - available;
882             } else {
883                 System.arraycopy(block, iRow * jWidth, outBlock, outIndex, jWidth);
884                 outIndex += jWidth;
885             }
886         }
887 
888         return out;
889 
890     }
891 
892     /** {@inheritDoc} */
893     @Override
894     public void setRowMatrix(final int row, final RealMatrix matrix)
895         throws MatrixIndexException, InvalidMatrixException {
896         try {
897             setRowMatrix(row, (BlockRealMatrix) matrix);
898         } catch (ClassCastException cce) {
899             super.setRowMatrix(row, matrix);
900         }
901     }
902 
903     /**
904      * Sets the entries in row number <code>row</code>
905      * as a row matrix.  Row indices start at 0.
906      *
907      * @param row the row to be set
908      * @param matrix row matrix (must have one row and the same number of columns
909      * as the instance)
910      * @throws MatrixIndexException if the specified row index is invalid
911      * @throws InvalidMatrixException if the matrix dimensions do not match one
912      * instance row
913      */
914     public void setRowMatrix(final int row, final BlockRealMatrix matrix)
915         throws MatrixIndexException, InvalidMatrixException {
916 
917         MatrixUtils.checkRowIndex(this, row);
918         final int nCols = getColumnDimension();
919         if ((matrix.getRowDimension() != 1) ||
920             (matrix.getColumnDimension() != nCols)) {
921             throw new InvalidMatrixException(
922                     "dimensions mismatch: got {0}x{1} but expected {2}x{3}",
923                     matrix.getRowDimension(), matrix.getColumnDimension(),
924                     1, nCols);
925         }
926 
927         // perform copy block-wise, to ensure good cache behavior
928         final int iBlock = row / BLOCK_SIZE;
929         final int iRow   = row - iBlock * BLOCK_SIZE;
930         int mBlockIndex  = 0;
931         int mIndex       = 0;
932         double[] mBlock  = matrix.blocks[mBlockIndex];
933         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
934             final int jWidth     = blockWidth(jBlock);
935             final double[] block = blocks[iBlock * blockColumns + jBlock];
936             final int available  = mBlock.length - mIndex;
937             if (jWidth > available) {
938                 System.arraycopy(mBlock, mIndex, block, iRow * jWidth, available);
939                 mBlock = matrix.blocks[++mBlockIndex];
940                 System.arraycopy(mBlock, 0, block, iRow * jWidth, jWidth - available);
941                 mIndex = jWidth - available;
942             } else {
943                 System.arraycopy(mBlock, mIndex, block, iRow * jWidth, jWidth);
944                 mIndex += jWidth;
945            }
946         }
947 
948     }
949     
950     /** {@inheritDoc} */
951     @Override
952     public BlockRealMatrix getColumnMatrix(final int column)
953         throws MatrixIndexException {
954 
955         MatrixUtils.checkColumnIndex(this, column);
956         final BlockRealMatrix out = new BlockRealMatrix(rows, 1);
957 
958         // perform copy block-wise, to ensure good cache behavior
959         final int jBlock  = column / BLOCK_SIZE;
960         final int jColumn = column - jBlock * BLOCK_SIZE;
961         final int jWidth  = blockWidth(jBlock);
962         int outBlockIndex = 0;
963         int outIndex      = 0;
964         double[] outBlock = out.blocks[outBlockIndex];
965         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
966             final int iHeight = blockHeight(iBlock);
967             final double[] block = blocks[iBlock * blockColumns + jBlock];
968             for (int i = 0; i < iHeight; ++i) {
969                 if (outIndex >= outBlock.length) {
970                     outBlock = out.blocks[++outBlockIndex];
971                     outIndex = 0;
972                 }
973                 outBlock[outIndex++] = block[i * jWidth + jColumn];
974             }
975         }
976 
977         return out;
978 
979     }
980 
981     /** {@inheritDoc} */
982     @Override
983     public void setColumnMatrix(final int column, final RealMatrix matrix)
984         throws MatrixIndexException, InvalidMatrixException {
985         try {
986             setColumnMatrix(column, (BlockRealMatrix) matrix);
987         } catch (ClassCastException cce) {
988             super.setColumnMatrix(column, matrix);
989         }
990     }
991 
992     /**
993      * Sets the entries in column number <code>column</code>
994      * as a column matrix.  Column indices start at 0.
995      *
996      * @param column the column to be set
997      * @param matrix column matrix (must have one column and the same number of rows
998      * as the instance)
999      * @throws MatrixIndexException if the specified column index is invalid
1000      * @throws InvalidMatrixException if the matrix dimensions do not match one
1001      * instance column
1002      */
1003     void setColumnMatrix(final int column, final BlockRealMatrix matrix)
1004         throws MatrixIndexException, InvalidMatrixException {
1005 
1006         MatrixUtils.checkColumnIndex(this, column);
1007         final int nRows = getRowDimension();
1008         if ((matrix.getRowDimension() != nRows) ||
1009             (matrix.getColumnDimension() != 1)) {
1010             throw new InvalidMatrixException(
1011                     "dimensions mismatch: got {0}x{1} but expected {2}x{3}",
1012                     matrix.getRowDimension(), matrix.getColumnDimension(),
1013                     nRows, 1);
1014         }
1015 
1016         // perform copy block-wise, to ensure good cache behavior
1017         final int jBlock  = column / BLOCK_SIZE;
1018         final int jColumn = column - jBlock * BLOCK_SIZE;
1019         final int jWidth  = blockWidth(jBlock);
1020         int mBlockIndex = 0;
1021         int mIndex      = 0;
1022         double[] mBlock = matrix.blocks[mBlockIndex];
1023         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1024             final int iHeight = blockHeight(iBlock);
1025             final double[] block = blocks[iBlock * blockColumns + jBlock];
1026             for (int i = 0; i < iHeight; ++i) {
1027                 if (mIndex >= mBlock.length) {
1028                     mBlock = matrix.blocks[++mBlockIndex];
1029                     mIndex = 0;
1030                 }
1031                 block[i * jWidth + jColumn] = mBlock[mIndex++];
1032             }
1033         }
1034 
1035     }
1036 
1037     /** {@inheritDoc} */
1038     @Override
1039     public RealVector getRowVector(final int row)
1040         throws MatrixIndexException {
1041 
1042         MatrixUtils.checkRowIndex(this, row);
1043         final double[] outData = new double[columns];
1044 
1045         // perform copy block-wise, to ensure good cache behavior
1046         final int iBlock  = row / BLOCK_SIZE;
1047         final int iRow    = row - iBlock * BLOCK_SIZE;
1048         int outIndex      = 0;
1049         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1050             final int jWidth     = blockWidth(jBlock);
1051             final double[] block = blocks[iBlock * blockColumns + jBlock];
1052             System.arraycopy(block, iRow * jWidth, outData, outIndex, jWidth);
1053             outIndex += jWidth;
1054         }
1055 
1056         return new ArrayRealVector(outData, false);
1057 
1058     }
1059 
1060     /** {@inheritDoc} */
1061     @Override
1062     public void setRowVector(final int row, final RealVector vector)
1063         throws MatrixIndexException, InvalidMatrixException {
1064         try {
1065             setRow(row, ((ArrayRealVector) vector).getDataRef());
1066         } catch (ClassCastException cce) {
1067             super.setRowVector(row, vector);
1068         }
1069     }
1070 
1071     /** {@inheritDoc} */
1072     @Override
1073     public RealVector getColumnVector(final int column)
1074         throws MatrixIndexException {
1075 
1076         MatrixUtils.checkColumnIndex(this, column);
1077         final double[] outData = new double[rows];
1078 
1079         // perform copy block-wise, to ensure good cache behavior
1080         final int jBlock  = column / BLOCK_SIZE;
1081         final int jColumn = column - jBlock * BLOCK_SIZE;
1082         final int jWidth  = blockWidth(jBlock);
1083         int outIndex      = 0;
1084         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1085             final int iHeight = blockHeight(iBlock);
1086             final double[] block = blocks[iBlock * blockColumns + jBlock];
1087             for (int i = 0; i < iHeight; ++i) {
1088                 outData[outIndex++] = block[i * jWidth + jColumn];
1089             }
1090         }
1091 
1092         return new ArrayRealVector(outData, false);
1093 
1094     }
1095 
1096     /** {@inheritDoc} */
1097     @Override
1098     public void setColumnVector(final int column, final RealVector vector)
1099         throws MatrixIndexException, InvalidMatrixException {
1100         try {
1101             setColumn(column, ((ArrayRealVector) vector).getDataRef());
1102         } catch (ClassCastException cce) {
1103             super.setColumnVector(column, vector);
1104         }
1105     }
1106 
1107     /** {@inheritDoc} */
1108     @Override
1109     public double[] getRow(final int row)
1110         throws MatrixIndexException {
1111 
1112         MatrixUtils.checkRowIndex(this, row);
1113         final double[] out = new double[columns];
1114 
1115         // perform copy block-wise, to ensure good cache behavior
1116         final int iBlock  = row / BLOCK_SIZE;
1117         final int iRow    = row - iBlock * BLOCK_SIZE;
1118         int outIndex      = 0;
1119         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1120             final int jWidth     = blockWidth(jBlock);
1121             final double[] block = blocks[iBlock * blockColumns + jBlock];
1122             System.arraycopy(block, iRow * jWidth, out, outIndex, jWidth);
1123             outIndex += jWidth;
1124         }
1125 
1126         return out;
1127 
1128     }
1129 
1130     /** {@inheritDoc} */
1131     @Override
1132     public void setRow(final int row, final double[] array)
1133         throws MatrixIndexException, InvalidMatrixException {
1134 
1135         MatrixUtils.checkRowIndex(this, row);
1136         final int nCols = getColumnDimension();
1137         if (array.length != nCols) {
1138             throw new InvalidMatrixException(
1139                     "dimensions mismatch: got {0}x{1} but expected {2}x{3}",
1140                     1, array.length, 1, nCols);
1141         }
1142 
1143         // perform copy block-wise, to ensure good cache behavior
1144         final int iBlock  = row / BLOCK_SIZE;
1145         final int iRow    = row - iBlock * BLOCK_SIZE;
1146         int outIndex      = 0;
1147         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1148             final int jWidth     = blockWidth(jBlock);
1149             final double[] block = blocks[iBlock * blockColumns + jBlock];
1150             System.arraycopy(array, outIndex, block, iRow * jWidth, jWidth);
1151             outIndex += jWidth;
1152         }
1153 
1154     }
1155 
1156     /** {@inheritDoc} */
1157     @Override
1158     public double[] getColumn(final int column)
1159         throws MatrixIndexException {
1160 
1161         MatrixUtils.checkColumnIndex(this, column);
1162         final double[] out = new double[rows];
1163 
1164         // perform copy block-wise, to ensure good cache behavior
1165         final int jBlock  = column / BLOCK_SIZE;
1166         final int jColumn = column - jBlock * BLOCK_SIZE;
1167         final int jWidth  = blockWidth(jBlock);
1168         int outIndex      = 0;
1169         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1170             final int iHeight = blockHeight(iBlock);
1171             final double[] block = blocks[iBlock * blockColumns + jBlock];
1172             for (int i = 0; i < iHeight; ++i) {
1173                 out[outIndex++] = block[i * jWidth + jColumn];
1174             }
1175         }
1176 
1177         return out;
1178 
1179     }
1180 
1181     /** {@inheritDoc} */
1182     @Override
1183     public void setColumn(final int column, final double[] array)
1184         throws MatrixIndexException, InvalidMatrixException {
1185 
1186         MatrixUtils.checkColumnIndex(this, column);
1187         final int nRows = getRowDimension();
1188         if (array.length != nRows) {
1189             throw new InvalidMatrixException(
1190                     "dimensions mismatch: got {0}x{1} but expected {2}x{3}",
1191                     array.length, 1, nRows, 1);
1192         }
1193 
1194         // perform copy block-wise, to ensure good cache behavior
1195         final int jBlock  = column / BLOCK_SIZE;
1196         final int jColumn = column - jBlock * BLOCK_SIZE;
1197         final int jWidth  = blockWidth(jBlock);
1198         int outIndex      = 0;
1199         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1200             final int iHeight = blockHeight(iBlock);
1201             final double[] block = blocks[iBlock * blockColumns + jBlock];
1202             for (int i = 0; i < iHeight; ++i) {
1203                 block[i * jWidth + jColumn] = array[outIndex++];
1204             }
1205         }
1206 
1207     }
1208 
1209     /** {@inheritDoc} */
1210     @Override
1211     public double getEntry(final int row, final int column)
1212         throws MatrixIndexException {
1213         try {
1214             final int iBlock = row    / BLOCK_SIZE;
1215             final int jBlock = column / BLOCK_SIZE;
1216             final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1217                                (column - jBlock * BLOCK_SIZE);
1218             return blocks[iBlock * blockColumns + jBlock][k];
1219         } catch (ArrayIndexOutOfBoundsException e) {
1220             throw new MatrixIndexException(
1221                     "no entry at indices ({0}, {1}) in a {2}x{3} matrix",
1222                     row, column, getRowDimension(), getColumnDimension());
1223         }
1224     }
1225 
1226     /** {@inheritDoc} */
1227     @Override
1228     public void setEntry(final int row, final int column, final double value)
1229         throws MatrixIndexException {
1230         try {
1231             final int iBlock = row    / BLOCK_SIZE;
1232             final int jBlock = column / BLOCK_SIZE;
1233             final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1234                                (column - jBlock * BLOCK_SIZE);
1235             blocks[iBlock * blockColumns + jBlock][k] = value;
1236         } catch (ArrayIndexOutOfBoundsException e) {
1237             throw new MatrixIndexException(
1238                     "no entry at indices ({0}, {1}) in a {2}x{3} matrix",
1239                     row, column, getRowDimension(), getColumnDimension());
1240         }
1241     }
1242 
1243     /** {@inheritDoc} */
1244     @Override
1245     public void addToEntry(final int row, final int column, final double increment)
1246         throws MatrixIndexException {
1247         try {
1248             final int iBlock = row    / BLOCK_SIZE;
1249             final int jBlock = column / BLOCK_SIZE;
1250             final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1251                                (column - jBlock * BLOCK_SIZE);
1252             blocks[iBlock * blockColumns + jBlock][k] += increment;
1253         } catch (ArrayIndexOutOfBoundsException e) {
1254             throw new MatrixIndexException(
1255                     "no entry at indices ({0}, {1}) in a {2}x{3} matrix",
1256                     row, column, getRowDimension(), getColumnDimension());
1257         }
1258     }
1259 
1260     /** {@inheritDoc} */
1261     @Override
1262     public void multiplyEntry(final int row, final int column, final double factor)
1263         throws MatrixIndexException {
1264         try {
1265             final int iBlock = row    / BLOCK_SIZE;
1266             final int jBlock = column / BLOCK_SIZE;
1267             final int k      = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1268                                (column - jBlock * BLOCK_SIZE);
1269             blocks[iBlock * blockColumns + jBlock][k] *= factor;
1270         } catch (ArrayIndexOutOfBoundsException e) {
1271             throw new MatrixIndexException(
1272                     "no entry at indices ({0}, {1}) in a {2}x{3} matrix",
1273                     row, column, getRowDimension(), getColumnDimension());
1274         }
1275     }
1276 
1277     /** {@inheritDoc} */
1278     @Override
1279     public BlockRealMatrix transpose() {
1280 
1281         final int nRows = getRowDimension();
1282         final int nCols = getColumnDimension();
1283         final BlockRealMatrix out = new BlockRealMatrix(nCols, nRows);
1284 
1285         // perform transpose block-wise, to ensure good cache behavior
1286         int blockIndex = 0;
1287         for (int iBlock = 0; iBlock < blockColumns; ++iBlock) {
1288             for (int jBlock = 0; jBlock < blockRows; ++jBlock) {
1289 
1290                 // transpose current block
1291                 final double[] outBlock = out.blocks[blockIndex];
1292                 final double[] tBlock   = blocks[jBlock * blockColumns + iBlock];
1293                 final int      pStart   = iBlock * BLOCK_SIZE;
1294                 final int      pEnd     = Math.min(pStart + BLOCK_SIZE, columns);
1295                 final int      qStart   = jBlock * BLOCK_SIZE;
1296                 final int      qEnd     = Math.min(qStart + BLOCK_SIZE, rows);
1297                 for (int p = pStart, k = 0; p < pEnd; ++p) {
1298                     final int lInc = pEnd - pStart;
1299                     for (int q = qStart, l = p - pStart; q < qEnd; ++q, l+= lInc) {
1300                         outBlock[k++] = tBlock[l];
1301                     }
1302                 }
1303 
1304                 // go to next block
1305                 ++blockIndex;
1306 
1307             }
1308         }
1309 
1310         return out;
1311 
1312     }
1313 
1314     /** {@inheritDoc} */
1315     @Override
1316     public int getRowDimension() {
1317         return rows;
1318     }
1319 
1320     /** {@inheritDoc} */
1321     @Override
1322     public int getColumnDimension() {
1323         return columns;
1324     }
1325 
1326     /** {@inheritDoc} */
1327     @Override
1328     public double[] operate(final double[] v)
1329         throws IllegalArgumentException {
1330 
1331         if (v.length != columns) {
1332             throw MathRuntimeException.createIllegalArgumentException(
1333                     "vector length mismatch: got {0} but expected {1}",
1334                     v.length, columns);
1335         }
1336         final double[] out = new double[rows];
1337 
1338         // perform multiplication block-wise, to ensure good cache behavior
1339         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1340             final int pStart = iBlock * BLOCK_SIZE;
1341             final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
1342             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1343                 final double[] block  = blocks[iBlock * blockColumns + jBlock];
1344                 final int      qStart = jBlock * BLOCK_SIZE;
1345                 final int      qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
1346                 for (int p = pStart, k = 0; p < pEnd; ++p) {
1347                     double sum = 0;
1348                     int q = qStart;
1349                     while (q < qEnd - 3) {
1350                         sum += block[k]     * v[q]     +
1351                                block[k + 1] * v[q + 1] +
1352                                block[k + 2] * v[q + 2] +
1353                                block[k + 3] * v[q + 3];
1354                         k += 4;
1355                         q += 4;
1356                     }
1357                     while (q < qEnd) {
1358                         sum += block[k++] * v[q++];
1359                     }
1360                     out[p] += sum;
1361                 }
1362             }
1363         }
1364 
1365         return out;
1366 
1367     }
1368 
1369     /** {@inheritDoc} */
1370     @Override
1371     public double[] preMultiply(final double[] v)
1372         throws IllegalArgumentException {
1373 
1374         if (v.length != rows) {
1375             throw MathRuntimeException.createIllegalArgumentException(
1376                     "vector length mismatch: got {0} but expected {1}",
1377                     v.length, rows);
1378         }
1379         final double[] out = new double[columns];
1380 
1381         // perform multiplication block-wise, to ensure good cache behavior
1382         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1383             final int jWidth  = blockWidth(jBlock);
1384             final int jWidth2 = jWidth  + jWidth;
1385             final int jWidth3 = jWidth2 + jWidth;
1386             final int jWidth4 = jWidth3 + jWidth;
1387             final int qStart = jBlock * BLOCK_SIZE;
1388             final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
1389             for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1390                 final double[] block  = blocks[iBlock * blockColumns + jBlock];
1391                 final int      pStart = iBlock * BLOCK_SIZE;
1392                 final int      pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
1393                 for (int q = qStart; q < qEnd; ++q) {
1394                     int k = q - qStart;
1395                     double sum = 0;
1396                     int p = pStart;
1397                     while (p < pEnd - 3) {
1398                         sum += block[k]           * v[p]     +
1399                                block[k + jWidth]  * v[p + 1] +
1400                                block[k + jWidth2] * v[p + 2] +
1401                                block[k + jWidth3] * v[p + 3];
1402                         k += jWidth4;
1403                         p += 4;
1404                     }
1405                     while (p < pEnd) {
1406                         sum += block[k] * v[p++];
1407                         k += jWidth;
1408                     }
1409                     out[q] += sum;
1410                 }
1411             }
1412         }
1413 
1414         return out;
1415 
1416     }
1417 
1418     /** {@inheritDoc} */
1419     @Override
1420     public double walkInRowOrder(final RealMatrixChangingVisitor visitor)
1421         throws MatrixVisitorException {
1422         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1423         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1424             final int pStart = iBlock * BLOCK_SIZE;
1425             final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
1426             for (int p = pStart; p < pEnd; ++p) {
1427                 for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1428                     final int jWidth = blockWidth(jBlock);
1429                     final int qStart = jBlock * BLOCK_SIZE;
1430                     final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
1431                     final double[] block = blocks[iBlock * blockColumns + jBlock];
1432                     for (int q = qStart, k = (p - pStart) * jWidth; q < qEnd; ++q, ++k) {
1433                         block[k] = visitor.visit(p, q, block[k]);
1434                     }
1435                 }
1436              }
1437         }
1438         return visitor.end();
1439     }
1440 
1441     /** {@inheritDoc} */
1442     @Override
1443     public double walkInRowOrder(final RealMatrixPreservingVisitor visitor)
1444         throws MatrixVisitorException {
1445         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1446         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1447             final int pStart = iBlock * BLOCK_SIZE;
1448             final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
1449             for (int p = pStart; p < pEnd; ++p) {
1450                 for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1451                     final int jWidth = blockWidth(jBlock);
1452                     final int qStart = jBlock * BLOCK_SIZE;
1453                     final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
1454                     final double[] block = blocks[iBlock * blockColumns + jBlock];
1455                     for (int q = qStart, k = (p - pStart) * jWidth; q < qEnd; ++q, ++k) {
1456                         visitor.visit(p, q, block[k]);
1457                     }
1458                 }
1459              }
1460         }
1461         return visitor.end();
1462     }
1463 
1464     /** {@inheritDoc} */
1465     @Override
1466     public double walkInRowOrder(final RealMatrixChangingVisitor visitor,
1467                                  final int startRow, final int endRow,
1468                                  final int startColumn, final int endColumn)
1469         throws MatrixIndexException, MatrixVisitorException {
1470         MatrixUtils.checkSubMatrixIndex(this, startRow, endRow, startColumn, endColumn);
1471         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1472         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1473             final int p0     = iBlock * BLOCK_SIZE;
1474             final int pStart = Math.max(startRow, p0);
1475             final int pEnd   = Math.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1476             for (int p = pStart; p < pEnd; ++p) {
1477                 for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1478                     final int jWidth = blockWidth(jBlock);
1479                     final int q0     = jBlock * BLOCK_SIZE;
1480                     final int qStart = Math.max(startColumn, q0);
1481                     final int qEnd   = Math.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1482                     final double[] block = blocks[iBlock * blockColumns + jBlock];
1483                     for (int q = qStart, k = (p - p0) * jWidth + qStart - q0; q < qEnd; ++q, ++k) {
1484                         block[k] = visitor.visit(p, q, block[k]);
1485                     }
1486                 }
1487              }
1488         }
1489         return visitor.end();
1490     }
1491 
1492     /** {@inheritDoc} */
1493     @Override
1494     public double walkInRowOrder(final RealMatrixPreservingVisitor visitor,
1495                                  final int startRow, final int endRow,
1496                                  final int startColumn, final int endColumn)
1497         throws MatrixIndexException, MatrixVisitorException {
1498         MatrixUtils.checkSubMatrixIndex(this, startRow, endRow, startColumn, endColumn);
1499         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1500         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1501             final int p0     = iBlock * BLOCK_SIZE;
1502             final int pStart = Math.max(startRow, p0);
1503             final int pEnd   = Math.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1504             for (int p = pStart; p < pEnd; ++p) {
1505                 for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1506                     final int jWidth = blockWidth(jBlock);
1507                     final int q0     = jBlock * BLOCK_SIZE;
1508                     final int qStart = Math.max(startColumn, q0);
1509                     final int qEnd   = Math.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1510                     final double[] block = blocks[iBlock * blockColumns + jBlock];
1511                     for (int q = qStart, k = (p - p0) * jWidth + qStart - q0; q < qEnd; ++q, ++k) {
1512                         visitor.visit(p, q, block[k]);
1513                     }
1514                 }
1515              }
1516         }
1517         return visitor.end();
1518     }
1519 
1520     /** {@inheritDoc} */
1521     @Override
1522     public double walkInOptimizedOrder(final RealMatrixChangingVisitor visitor)
1523         throws MatrixVisitorException {
1524         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1525         for (int iBlock = 0, blockIndex = 0; iBlock < blockRows; ++iBlock) {
1526             final int pStart = iBlock * BLOCK_SIZE;
1527             final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
1528             for (int jBlock = 0; jBlock < blockColumns; ++jBlock, ++blockIndex) {
1529                 final int qStart = jBlock * BLOCK_SIZE;
1530                 final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
1531                 final double[] block = blocks[blockIndex];
1532                 for (int p = pStart, k = 0; p < pEnd; ++p) {
1533                     for (int q = qStart; q < qEnd; ++q, ++k) {
1534                         block[k] = visitor.visit(p, q, block[k]);
1535                     }
1536                 }
1537             }
1538         }
1539         return visitor.end();
1540     }
1541 
1542     /** {@inheritDoc} */
1543     @Override
1544     public double walkInOptimizedOrder(final RealMatrixPreservingVisitor visitor)
1545         throws MatrixVisitorException {
1546         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1547         for (int iBlock = 0, blockIndex = 0; iBlock < blockRows; ++iBlock) {
1548             final int pStart = iBlock * BLOCK_SIZE;
1549             final int pEnd   = Math.min(pStart + BLOCK_SIZE, rows);
1550             for (int jBlock = 0; jBlock < blockColumns; ++jBlock, ++blockIndex) {
1551                 final int qStart = jBlock * BLOCK_SIZE;
1552                 final int qEnd   = Math.min(qStart + BLOCK_SIZE, columns);
1553                 final double[] block = blocks[blockIndex];
1554                 for (int p = pStart, k = 0; p < pEnd; ++p) {
1555                     for (int q = qStart; q < qEnd; ++q, ++k) {
1556                         visitor.visit(p, q, block[k]);
1557                     }
1558                 }
1559             }
1560         }
1561         return visitor.end();
1562     }
1563 
1564     /** {@inheritDoc} */
1565     @Override
1566     public double walkInOptimizedOrder(final RealMatrixChangingVisitor visitor,
1567                                        final int startRow, final int endRow,
1568                                        final int startColumn, final int endColumn)
1569         throws MatrixIndexException, MatrixVisitorException {
1570         MatrixUtils.checkSubMatrixIndex(this, startRow, endRow, startColumn, endColumn);
1571         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1572         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1573             final int p0     = iBlock * BLOCK_SIZE;
1574             final int pStart = Math.max(startRow, p0);
1575             final int pEnd   = Math.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1576             for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1577                 final int jWidth = blockWidth(jBlock);
1578                 final int q0     = jBlock * BLOCK_SIZE;
1579                 final int qStart = Math.max(startColumn, q0);
1580                 final int qEnd   = Math.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1581                 final double[] block = blocks[iBlock * blockColumns + jBlock];
1582                 for (int p = pStart; p < pEnd; ++p) {
1583                     for (int q = qStart, k = (p - p0) * jWidth + qStart - q0; q < qEnd; ++q, ++k) {
1584                         block[k] = visitor.visit(p, q, block[k]);
1585                     }
1586                 }
1587             }
1588         }
1589         return visitor.end();
1590     }
1591 
1592     /** {@inheritDoc} */
1593     @Override
1594     public double walkInOptimizedOrder(final RealMatrixPreservingVisitor visitor,
1595                                        final int startRow, final int endRow,
1596                                        final int startColumn, final int endColumn)
1597         throws MatrixIndexException, MatrixVisitorException {
1598         MatrixUtils.checkSubMatrixIndex(this, startRow, endRow, startColumn, endColumn);
1599         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1600         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1601             final int p0     = iBlock * BLOCK_SIZE;
1602             final int pStart = Math.max(startRow, p0);
1603             final int pEnd   = Math.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1604             for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1605                 final int jWidth = blockWidth(jBlock);
1606                 final int q0     = jBlock * BLOCK_SIZE;
1607                 final int qStart = Math.max(startColumn, q0);
1608                 final int qEnd   = Math.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1609                 final double[] block = blocks[iBlock * blockColumns + jBlock];
1610                 for (int p = pStart; p < pEnd; ++p) {
1611                     for (int q = qStart, k = (p - p0) * jWidth + qStart - q0; q < qEnd; ++q, ++k) {
1612                         visitor.visit(p, q, block[k]);
1613                     }
1614                 }
1615             }
1616         }
1617         return visitor.end();
1618     }
1619 
1620     /**
1621      * Get the height of a block.
1622      * @param blockRow row index (in block sense) of the block
1623      * @return height (number of rows) of the block
1624      */
1625     private int blockHeight(final int blockRow) {
1626         return (blockRow == blockRows - 1) ? rows - blockRow * BLOCK_SIZE : BLOCK_SIZE;
1627     }
1628 
1629     /**
1630      * Get the width of a block.
1631      * @param blockColumn column index (in block sense) of the block
1632      * @return width (number of columns) of the block
1633      */
1634     private int blockWidth(final int blockColumn) {
1635         return (blockColumn == blockColumns - 1) ? columns - blockColumn * BLOCK_SIZE : BLOCK_SIZE;
1636     }
1637 
1638 }