CXXR (C++ R) API
Subscripting.hpp
Go to the documentation of this file.
1 /*CXXR $Id: Subscripting.hpp 1348 2013-02-25 17:49:03Z arr $
2  *CXXR
3  *CXXR This file is part of CXXR, a project to refactor the R interpreter
4  *CXXR into C++. It may consist in whole or in part of program code and
5  *CXXR documentation taken from the R project itself, incorporated into
6  *CXXR CXXR (and possibly MODIFIED) under the terms of the GNU General Public
7  *CXXR Licence.
8  *CXXR
9  *CXXR CXXR is Copyright (C) 2008-13 Andrew R. Runnalls, subject to such other
10  *CXXR copyrights and copyright restrictions as may be stated below.
11  *CXXR
12  *CXXR CXXR is not part of the R project, and bugs and other issues should
13  *CXXR not be reported via r-bugs or other R project channels; instead refer
14  *CXXR to the CXXR website.
15  *CXXR */
16 
17 /*
18  * R : A Computer Language for Statistical Data Analysis
19  *
20  * This program is free software; you can redistribute it and/or modify
21  * it under the terms of the GNU General Public License as published by
22  * the Free Software Foundation; either version 2.1 of the License, or
23  * (at your option) any later version.
24  *
25  * This program is distributed in the hope that it will be useful,
26  * but WITHOUT ANY WARRANTY; without even the implied warranty of
27  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28  * GNU Lesser General Public License for more details.
29  *
30  * You should have received a copy of the GNU General Public License
31  * along with this program; if not, a copy is available at
32  * http://www.r-project.org/Licenses/
33  */
34 
41 #ifndef SUBSCRIPTING_HPP
42 #define SUBSCRIPTING_HPP 1
43 
44 #include "CXXR/GCStackRoot.hpp"
45 #include "CXXR/IntVector.h"
46 #include "CXXR/ListVector.h"
47 #include "CXXR/PairList.h"
48 #include "CXXR/StringVector.h"
49 #include "CXXR/Symbol.h"
50 
51 namespace CXXR {
85  class Subscripting {
86  public:
129  template <class VL, class VR>
130  static VL* arraySubassign(VL* lhs, const ListVector* indices,
131  const VR* rhs);
132 
176  template <class VL, class VR>
177  static VL* arraySubassign(VL* lhs, const PairList* subscripts,
178  const VR* rhs)
179  {
182  subscripts));
183  return arraySubassign(lhs, indices, rhs);
184  }
185 
215  template <class V>
216  static V* arraySubset(const V* v, const ListVector* indices,
217  bool drop);
218 
244  template <class V>
245  static V* arraySubset(const V* v, const PairList* subscripts,
246  bool drop)
247  {
250  subscripts));
251  return arraySubset(v, indices, drop);
252  }
253 
279  static std::pair<const IntVector*, std::size_t>
280  canonicalize(const IntVector* raw_indices, std::size_t range_size);
281 
318  static std::pair<const IntVector*, std::size_t>
319  canonicalize(const LogicalVector* raw_indices, std::size_t range_size);
320 
346  static std::pair<const IntVector*, std::size_t>
347  canonicalize(const RObject* raw_indices, std::size_t range_size,
348  const StringVector* range_names);
349 
397  static std::pair<const IntVector*, std::size_t>
398  canonicalize(const StringVector* raw_indices, std::size_t range_size,
399  const StringVector* range_names);
400 
423  static const ListVector*
425  const PairList* subscripts);
426 
454  static bool dropDimensions(VectorBase* v);
455 
497  template <class VL, class VR>
498  static VL* subassign(VL* lhs, const PairList* subscripts,
499  const VR* rhs);
500 
527  template <class V>
528  static V* subset(const V* v, const PairList* subscripts, bool drop);
529 
575  template <class VL, class VR>
576  static VL* vectorSubassign(VL* lhs,
577  const std::pair<const IntVector*,
578  std::size_t>& indices_pr,
579  const VR* rhs);
580 
624  template <class VL, class VR>
625  static VL* vectorSubassign(VL* lhs, const RObject* subscripts,
626  const VR* rhs)
627  {
628  const std::pair<const IntVector*, std::size_t>
629  pr(canonicalize(subscripts, lhs->size(), lhs->names()));
630  GCStackRoot<const IntVector> iv(pr.first);
631  return vectorSubassign(lhs, pr, rhs);
632  }
633 
657  template <class V>
658  static V* vectorSubset(const V* v, const IntVector* indices);
659 
676  template <class V>
677  static V* vectorSubset(const V* v, const RObject* subscripts)
678  {
680  indices(canonicalize(subscripts, v->size(), v->names()).first);
681  return vectorSubset(v, indices);
682  }
683  private:
684  // Data structure used in subsetting arrays, containing
685  // information relating to a particular dimension.
686  struct DimIndexer {
687  unsigned int nindices; // Number of index values to be
688  // extracted along this dimension.
689  const IntVector* indices; // Pointer to array containing the index
690  // values themselves. The index values count from 1.
691  unsigned int indexnum; // Position (counting from 0) of
692  // the index within 'indices' currently being processed.
693  unsigned int stride; // Number of elements (within the
694  // linear layout of the source array) separating
695  // consecutive elements along this dimension.
696  };
697 
698  typedef std::vector<DimIndexer, Allocator<DimIndexer> > DimIndexerVector;
699 
700  // Not implemented. Declared private to prevent the
701  // inadvertent creation of Subscripting objects.
702  Subscripting();
703 
704  // Non-templated auxiliary function for arraySubset(), used to
705  // initialise the vector of DimIndexers. The function returns
706  // the required size of the output vector.
707  static std::size_t createDimIndexers(DimIndexerVector* dimindexers,
708  const IntVector* source_dims,
709  const ListVector* indices);
710 
711  // If 'indices' has a 'use.names' attribute, use this to
712  // update the 'names' attribute of 'v'.
713  static void processUseNames(VectorBase* v, const IntVector* indices);
714 
715  // Non-templated auxiliary function for arraySubset(), used to
716  // set the attributes on the result.
717  static void setArrayAttributes(VectorBase* subset,
718  const VectorBase* source,
719  const DimIndexerVector& dimindexers,
720  bool drop);
721 
741  static void setVectorAttributes(VectorBase* subset,
742  const VectorBase* source,
743  const IntVector* indices);
744  }; // class Subscripting
745 
746  template <class VL, class VR>
747  VL* Subscripting::arraySubassign(VL* lhs, const ListVector* indices,
748  const VR* rhs)
749  {
750  typedef typename VL::value_type Lval;
751  typedef typename VR::value_type Rval;
752  const IntVector* vdims = lhs->dimensions();
753  std::size_t ndims = vdims->size();
754  DimIndexerVector dimindexer(ndims);
755  std::size_t ni = createDimIndexers(&dimindexer, vdims, indices);
756  std::size_t rhs_size = rhs->size();
757  if (rhs_size > 1) {
758  // TODO: Move NA-detection back into the canonicalisation
759  // process.
760  for (unsigned int d = 0; d < ndims; ++d) {
761  DimIndexer& di = dimindexer[d];
762  for (unsigned int i = 0; i < di.nindices; ++i)
763  if (isNA((*di.indices)[i]))
764  Rf_error(_("NA subscripts are not allowed"
765  " in this context"));
766  }
767  }
768  GCStackRoot<VL> ans(lhs);
769  // If necessary, make a copy to be sure we don't modify rhs.
770  // (FIXME: ideally this should be a shallow copy for
771  // HandleVectors.)
772  const VectorBase* ansvb = static_cast<VectorBase*>(ans.get());
773  if (ansvb == rhs)
774  ans = CXXR_NEW(VL(*ans.get()));
775  // Dispose of 'no indices' case:
776  if (ni == 0)
777  return ans;
778  if (rhs_size == 0)
779  Rf_error(_("replacement has length zero"));
780  if (ni%rhs_size != 0)
781  Rf_warning(_("number of items to replace is not"
782  " a multiple of replacement length"));
783  // Copy elements across:
784  for (unsigned int irhs = 0; irhs < ni; ++irhs) {
785  bool naindex = false;
786  unsigned int iout = 0;
787  for (unsigned int d = 0; d < ndims; ++d) {
788  const DimIndexer& di = dimindexer[d];
789  int index = (*di.indices)[di.indexnum];
790  if (isNA(index)) {
791  naindex = true;
792  break;
793  }
794  iout += (index - 1)*di.stride;
795  }
796  if (!naindex) {
797  // Be careful not to create a temporary RHandle.
798  Lval& lval = (*ans)[iout];
799  const Rval& rval = (*rhs)[irhs % rhs_size];
800  if (isNA(rval))
801  lval = NA<Lval>();
802  else
803  lval = rval;
804  }
805  // Advance the index selection:
806  {
807  unsigned int d = 0;
808  bool done;
809  do {
810  done = true;
811  DimIndexer& di = dimindexer[d];
812  if (++di.indexnum >= di.nindices) {
813  di.indexnum = 0;
814  done = (++d >= ndims);
815  }
816  } while (!done);
817  }
818  }
819  return ans;
820  }
821 
822 
823  template <class V>
824  V* Subscripting::arraySubset(const V* v, const ListVector* indices,
825  bool drop)
826  {
827  const IntVector* vdims = v->dimensions();
828  std::size_t ndims = vdims->size();
829  DimIndexerVector dimindexer(ndims);
830  std::size_t resultsize = createDimIndexers(&dimindexer, vdims, indices);
831  GCStackRoot<V> result(CXXR_NEW(V(resultsize)));
832  // Copy elements across:
833  {
834  // ***** FIXME ***** Currently needed because Handle's
835  // assignment operator takes a non-const RHS:
836  V* vnc = const_cast<V*>(v);
837  for (unsigned int iout = 0; iout < resultsize; ++iout) {
838  bool naindex = false;
839  unsigned int iin = 0;
840  for (unsigned int d = 0; d < ndims; ++d) {
841  const DimIndexer& di = dimindexer[d];
842  int index = (*di.indices)[di.indexnum];
843  if (isNA(index)) {
844  naindex = true;
845  break;
846  }
847  iin += (index - 1)*di.stride;
848  }
849  (*result)[iout]
850  = (naindex ? NA<typename V::value_type>()
851  : (*vnc)[iin]);
852  // Advance the index selection:
853  {
854  unsigned int d = 0;
855  bool done;
856  do {
857  done = true;
858  DimIndexer& di = dimindexer[d];
859  if (++di.indexnum >= di.nindices) {
860  di.indexnum = 0;
861  done = (++d >= ndims);
862  }
863  } while (!done);
864  }
865  }
866  }
867  setArrayAttributes(result, v, dimindexer, drop);
868  return result;
869  }
870 
871  template <class VL, class VR>
872  VL* Subscripting::subassign(VL* lhs, const PairList* subscripts,
873  const VR* rhs)
874  {
875  unsigned int nsubs = listLength(subscripts);
876  if (nsubs > 1)
877  return arraySubassign(lhs, subscripts, rhs);
878  const IntVector* dims = lhs->dimensions();
879  if (dims && dims->size() == nsubs)
880  return arraySubassign(lhs, subscripts, rhs);
881  return vectorSubassign(lhs, (subscripts ? subscripts->car()
882  : Symbol::missingArgument()), rhs);
883  }
884 
885  template <class V>
886  V* Subscripting::subset(const V* v, const PairList* subscripts, bool drop)
887  {
888  unsigned int nsubs = listLength(subscripts);
889  if (nsubs > 1)
890  return arraySubset(v, subscripts, drop);
891  const IntVector* dims = v->dimensions();
892  if (dims && dims->size() == nsubs)
893  return arraySubset(v, subscripts, drop);
894  return vectorSubset(v, (subscripts ? subscripts->car()
896  }
897 
898  template <class VL, class VR>
900  const std::pair<const IntVector*,
901  std::size_t>& indices_pr,
902  const VR* rhs)
903  {
904  typedef typename VL::value_type Lval;
905  typedef typename VR::value_type Rval;
906  const IntVector* indices = indices_pr.first;
907  std::size_t newsize = indices_pr.second;
908  std::size_t ni = indices->size();
909  std::size_t rhs_size = rhs->size();
910  if (rhs_size > 1) {
911  // TODO: Move NA-detection back into the canonicalisation
912  // process.
913  for (unsigned int i = 0; i < ni; ++i)
914  if (isNA((*indices)[i]))
915  Rf_error(_("NA subscripts are not allowed"
916  " in this context"));
917  }
918  GCStackRoot<VL> ans(lhs);
919  if (newsize > lhs->size())
920  ans = VectorBase::resize(lhs, newsize);
921  // If necessary, make a copy to be sure we don't modify rhs or
922  // indices. (FIXME: ideally this should be a shallow copy for
923  // HandleVectors.)
924  const VectorBase* ansvb = static_cast<VectorBase*>(ans.get());
925  if (ansvb == rhs || ansvb == indices)
926  ans = CXXR_NEW(VL(*ans.get()));
927  // Dispose of 'no indices' case:
928  if (ni == 0)
929  return ans;
930  if (rhs_size == 0)
931  Rf_error(_("replacement has length zero"));
932  if (ni%rhs_size != 0)
933  Rf_warning(_("number of items to replace is not"
934  " a multiple of replacement length"));
935  for (unsigned int i = 0; i < ni; ++i) {
936  int index = (*indices)[i];
937  if (!isNA(index)) {
938  // Be careful not to create a temporary RHandle.
939  Lval& lval = (*ans)[index - 1];
940  const Rval& rval = (*rhs)[i % rhs_size];
941  if (isNA(rval))
942  lval = NA<Lval>();
943  else
944  lval = rval;
945  }
946  }
947  processUseNames(ans, indices);
948  return ans;
949  }
950 
951  template <class V>
952  V* Subscripting::vectorSubset(const V* v, const IntVector* indices)
953  {
954  std::size_t ni = indices->size();
955  GCStackRoot<V> ans(CXXR_NEW(V(ni)));
956  std::size_t vsize = v->size();
957  // ***** FIXME ***** Currently needed because Handle's
958  // assignment operator takes a non-const RHS:
959  V* vnc = const_cast<V*>(v);
960  for (unsigned int i = 0; i < ni; ++i) {
961  int index = (*indices)[i];
962  // Note that zero and negative indices ought not to occur.
963  if (isNA(index) || index > int(vsize))
964  (*ans)[i] = NA<typename V::value_type>();
965  else (*ans)[i] = (*vnc)[index - 1];
966  }
967  setVectorAttributes(ans, v, indices);
968  return ans;
969  }
970 } // namespace CXXR;
971 
972 #endif // SUBSCRIPTING_HPP