[prediction] GeneralPredictionEngine initial commit (CoOccurenceMiner). 21/54921/2
authorMarcin Masternak <m.masternak@samsung.com>
Mon, 21 Dec 2015 10:11:59 +0000 (11:11 +0100)
committerMarcin Masternak <m.masternak@samsung.com>
Mon, 21 Dec 2015 10:11:59 +0000 (11:11 +0100)
Change-Id: I1693bcdec7cf6769d5de38c06c5f20640d305115
Signed-off-by: Marcin Masternak <m.masternak@samsung.com>
39 files changed:
AUTHORS
CMakeLists.txt
packaging/statistics-context-provider.spec
src/prediction/assoc_rule.cpp [new file with mode: 0644]
src/prediction/assoc_rule.h [new file with mode: 0644]
src/prediction/assoc_rule_miner.cpp [new file with mode: 0644]
src/prediction/assoc_rule_miner.h [new file with mode: 0644]
src/prediction/assoc_rule_of_ids.h [new file with mode: 0644]
src/prediction/assoc_rule_producer.cpp [new file with mode: 0644]
src/prediction/assoc_rule_producer.h [new file with mode: 0644]
src/prediction/basket.cpp [new file with mode: 0644]
src/prediction/basket.h [new file with mode: 0644]
src/prediction/basket_compressor.cpp [new file with mode: 0644]
src/prediction/basket_compressor.h [new file with mode: 0644]
src/prediction/basket_filter.cpp [new file with mode: 0644]
src/prediction/basket_filter.h [new file with mode: 0644]
src/prediction/basket_producer.cpp [new file with mode: 0644]
src/prediction/basket_producer.h [new file with mode: 0644]
src/prediction/baskets_agregator.cpp [new file with mode: 0644]
src/prediction/baskets_agregator.h [new file with mode: 0644]
src/prediction/event.cpp [new file with mode: 0644]
src/prediction/event.h [new file with mode: 0644]
src/prediction/event_set.h [new file with mode: 0644]
src/prediction/i_item_id_filter.h [new file with mode: 0644]
src/prediction/interval.cpp [new file with mode: 0644]
src/prediction/interval.h [new file with mode: 0644]
src/prediction/item.cpp [new file with mode: 0644]
src/prediction/item.h [new file with mode: 0644]
src/prediction/item_catalogue.cpp [new file with mode: 0644]
src/prediction/item_catalogue.h [new file with mode: 0644]
src/prediction/item_id_set.h [new file with mode: 0644]
src/prediction/item_set.cpp [new file with mode: 0644]
src/prediction/item_set.h [new file with mode: 0644]
src/prediction/item_string_converter.cpp [new file with mode: 0644]
src/prediction/item_string_converter.h [new file with mode: 0644]
src/prediction/single_category_item_id_filter.cpp [new file with mode: 0644]
src/prediction/single_category_item_id_filter.h [new file with mode: 0644]
src/prediction/weight_apriori.cpp [new file with mode: 0644]
src/prediction/weight_apriori.h [new file with mode: 0644]

diff --git a/AUTHORS b/AUTHORS
index 23ed658..b158d64 100644 (file)
--- a/AUTHORS
+++ b/AUTHORS
@@ -1,5 +1,10 @@
-Kyoung-Mook Choi <kmook.choi@samsung.com>
-Sanggun Lee      <sanggun7.lee@samsung.com>
-Ickhee Woo       <ickhee.woo@samsung.com>
-Myungho Lee      <milk.lee@samsung.com>
-Mu-Woong Lee     <muwoong.lee@samsung.com>
+Kyoung-Mook Choi   <kmook.choi@samsung.com>
+Sanggun Lee        <sanggun7.lee@samsung.com>
+Ickhee Woo         <ickhee.woo@samsung.com>
+Myungho Lee        <milk.lee@samsung.com>
+Mu-Woong Lee       <muwoong.lee@samsung.com>
+Bozena Lukasiak    <b.lukasiak@samsung.com>
+Marcin Masternak   <m.masternak@samsung.com>
+Piotr Czarnecki    <p.czarnecki@samsung.com>
+Tomasz Toczyski    <t.toczyski@samsung.com>
+Witold Chmielowiec <w.chmielowie@samsung.com>
\ No newline at end of file
index ed887dc..7319923 100644 (file)
@@ -9,6 +9,7 @@ SET(target_provider "ctx-stats-provider")
 FILE(GLOB SRCS src/*.cpp)
 FILE(GLOB SRCS ${SRCS} src/shared/*.cpp)
 FILE(GLOB SRCS ${SRCS} src/app/*.cpp)
+FILE(GLOB SRCS ${SRCS} src/prediction/*.cpp)
 
 # Dependencies
 SET(provider_deps "context-common capi-system-runtime-info pkgmgr pkgmgr-info capi-appfw-package-manager")
index 3fef186..cbb2187 100644 (file)
@@ -50,6 +50,7 @@ export CXXFLAGS+=" -Wextra -Wcast-align -Wcast-qual -Wshadow -Wwrite-strings -Ws
 
 export   CFLAGS+=" -Wno-unused-parameter -Wno-empty-body"
 export CXXFLAGS+=" -Wno-unused-parameter -Wno-empty-body"
+export CXXFLAGS+=" -std=c++11"
 
 export   CFLAGS+=" -fno-omit-frame-pointer -fno-optimize-sibling-calls -fno-strict-aliasing -fno-unroll-loops -fsigned-char -fstrict-overflow -fno-common"
 export CXXFLAGS+=" -fno-omit-frame-pointer -fno-optimize-sibling-calls -fno-strict-aliasing -fno-unroll-loops -fsigned-char -fstrict-overflow"
diff --git a/src/prediction/assoc_rule.cpp b/src/prediction/assoc_rule.cpp
new file mode 100644 (file)
index 0000000..05be246
--- /dev/null
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "assoc_rule.h"
+
+std::ostream& ctx::operator<<(std::ostream &out, const ctx::AssocRule &rule)
+{
+       return out << rule.antecedent << " => " << rule.consequent
+                       << "; support:" << rule.support << "; confidence:" << rule.confidence;
+}
+
+bool ctx::operator==(const ctx::AssocRule& left, const ctx::AssocRule& right)
+{
+       return  left.antecedent == right.antecedent &&
+                       left.consequent == right.consequent &&
+                       left.support == right.support &&
+                       left.confidence == right.confidence;
+}
diff --git a/src/prediction/assoc_rule.h b/src/prediction/assoc_rule.h
new file mode 100644 (file)
index 0000000..252684e
--- /dev/null
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INCLUDE_ASSOC_RULE_H_
+#define INCLUDE_ASSOC_RULE_H_
+
+#include "item_set.h"
+#include <iostream>
+
+namespace ctx {
+
+       struct AssocRule {
+               ItemSet antecedent;
+               ItemSet consequent;
+               double support;
+               double confidence;
+
+               AssocRule(const ItemSet& antecedent_,
+                               const ItemSet& consequent_,
+                               const double& support_,
+                               const double& confidence_)
+                               : antecedent(antecedent_)
+                               , consequent(consequent_)
+                               , support(support_)
+                               , confidence(confidence_) {}
+       };
+
+       bool operator==(const AssocRule& left, const AssocRule& right);
+
+       std::ostream& operator<<(std::ostream &out, const AssocRule &rule);
+
+}      /* namespace ctx */
+
+#endif /* INCLUDE_ASSOC_RULE_H_ */
diff --git a/src/prediction/assoc_rule_miner.cpp b/src/prediction/assoc_rule_miner.cpp
new file mode 100644 (file)
index 0000000..a639af8
--- /dev/null
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "assoc_rule_miner.h"
+#include "single_category_item_id_filter.h"
+#include "basket_filter.h"
+#include "basket_compressor.h"
+#include "baskets_agregator.h"
+#include "assoc_rule_producer.h"
+#include "weight_apriori.h"
+
+std::list<ctx::AssocRule> ctx::AssocRuleMiner::mine_from_baskets(
+               std::list<ctx::Basket>& initBaskets,
+               ctx::ItemCatalogue& itemCatalogue,
+               double minSupport,
+               double minConfidence,
+               std::string consequentCategory)
+{
+       SingleCategoryItemIdFilter itemIdFilter(consequentCategory, itemCatalogue);
+       BasketFilter::filter_in_place(initBaskets,itemIdFilter);
+
+       std::list<Basket> compressedBaskets = BasketCompressor::compress(initBaskets);
+
+       BasketsAgregator basketsAgregator(compressedBaskets);
+       basketsAgregator.generate_bitsets(itemCatalogue.maxId());
+       auto freqItemSets = WeightApriori::find_frequent_itemid_sets(basketsAgregator, minSupport);
+       auto rulesOfIds = AssocRuleProducer::generate_rules(freqItemSets, itemIdFilter, minConfidence);
+       return remove_assoc_rule_ids(rulesOfIds, itemCatalogue);
+}
+
+std::list<ctx::AssocRule> ctx::AssocRuleMiner::mine_from_events(
+               const ctx::EventSet& events,
+               double minSupport,
+               double minConfidence,
+               std::string consequentCategory)
+{
+       ItemCatalogue itemCatalogue;
+
+       BasketProducer basketProducer(itemCatalogue);
+       for (auto& event : events) {
+               basketProducer.put_event(event);
+       }
+
+       std::list<Basket> initBaskets = basketProducer.make_baskets();
+
+       return mine_from_baskets(initBaskets, itemCatalogue, minSupport, minConfidence, consequentCategory);
+}
+
+std::list<ctx::AssocRule> ctx::AssocRuleMiner::remove_assoc_rule_ids(
+               const std::list<ctx::AssocRuleOfIds>& rulesOfIds,
+               const ctx::ItemCatalogue& itemCatalogue)
+{
+       std::list<AssocRule> result;
+       for (const AssocRuleOfIds ruleOfIds : rulesOfIds) {
+               AssocRule rule(remove_assoc_rule_ids(ruleOfIds.antecedent, itemCatalogue),
+                                       remove_assoc_rule_ids(ruleOfIds.consequent, itemCatalogue),
+                                       ruleOfIds.support,
+                                       ruleOfIds.confidence);
+               result.push_back(rule);
+       }
+       return result;
+}
+
+ctx::ItemSet ctx::AssocRuleMiner::remove_assoc_rule_ids(const ctx::ItemIdSet& itemIdSet, const ctx::ItemCatalogue& itemCatalogue)
+{
+       ItemSet itemSet;
+       for (int id : itemIdSet) {
+               itemSet.push_back(itemCatalogue.item_of_id(id));
+       }
+       return itemSet;
+}
diff --git a/src/prediction/assoc_rule_miner.h b/src/prediction/assoc_rule_miner.h
new file mode 100644 (file)
index 0000000..6d57e3f
--- /dev/null
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ASSOC_RULE_MINER_H_
+#define ASSOC_RULE_MINER_H_
+
+#include <list>
+#include <string>
+#include "event_set.h"
+#include "assoc_rule.h"
+#include "assoc_rule_of_ids.h"
+#include "basket_producer.h"
+#include "i_item_id_filter.h"
+
+namespace ctx {
+
+       class AssocRuleMiner {
+
+       public:
+               static std::list<AssocRule> mine_from_events(
+                               const EventSet& events,
+                               double minSupport,
+                               double minConfidence,
+                               std::string consequentCategory);
+               static std::list<AssocRule> mine_from_baskets(
+                               std::list<Basket>& initBaskets,
+                               ItemCatalogue& itemCatalogue,
+                               double minSupport,
+                               double minConfidence,
+                               std::string consequentCategory);
+       private:
+               static std::list<AssocRule> remove_assoc_rule_ids(const std::list<AssocRuleOfIds>& rulesOfIds, const ItemCatalogue& itemCatalogue);
+               static ItemSet remove_assoc_rule_ids(const ItemIdSet& itemIdSet, const ItemCatalogue& itemCatalogue);
+
+       };
+
+}      /* namespace ctx */
+
+#endif /* ASSOC_RULE_MINER_H_ */
diff --git a/src/prediction/assoc_rule_of_ids.h b/src/prediction/assoc_rule_of_ids.h
new file mode 100644 (file)
index 0000000..b7dea9a
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ASSOC_RULE_OF_IDS_H_
+#define ASSOC_RULE_OF_IDS_H_
+
+#include "item_id_set.h"
+
+namespace ctx {
+
+       /* rule of the form antecedent => consequent */
+       class AssocRuleOfIds {
+
+       public:
+               ItemIdSet antecedent;
+               ItemIdSet consequent;
+               double support;
+               double confidence;
+       };
+
+       inline bool operator==(const AssocRuleOfIds& left, const AssocRuleOfIds& right)
+       {
+               return  left.support == right.support
+                        && left.confidence == right.confidence
+                        && left.antecedent == right.antecedent
+                        && left.consequent == right.consequent;
+       }
+
+}      /* namespace ctx */
+
+#endif /* ASSOC_RULE_OF_IDS_H_ */
diff --git a/src/prediction/assoc_rule_producer.cpp b/src/prediction/assoc_rule_producer.cpp
new file mode 100644 (file)
index 0000000..da9f5d2
--- /dev/null
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "assoc_rule_producer.h"
+
+ctx::AssocRuleOfIds ctx::AssocRuleProducer::rule_template(const ctx::ItemIdSet& sourceItemIdSet,
+               const ctx::IItemIdFilter& itemIdFilter)
+{
+       AssocRuleOfIds ruleTempl;
+       for (int itemId : sourceItemIdSet) {
+               ItemIdSet &destination = itemIdFilter.pass(itemId)
+                               ? ruleTempl.consequent
+                               : ruleTempl.antecedent;
+               destination.push_back(itemId);
+       }
+       return ruleTempl;
+}
+
+std::list<ctx::AssocRuleOfIds> ctx::AssocRuleProducer::generate_rules(
+               const std::list<std::pair<ctx::ItemIdSet, double>>& freqItemIdSets,
+               const ctx::IItemIdFilter& itemIdFilter, double minConfidence)
+{
+       std::list<AssocRuleOfIds> rules;
+
+       for (auto & pair : freqItemIdSets) {
+               ItemIdSet freqItemIdSet = pair.first;
+               double support = pair.second;
+               AssocRuleOfIds rule = AssocRuleProducer::rule_template(freqItemIdSet, itemIdFilter);
+               if (rule.antecedent.size() > 0 && rule.consequent.size() > 0) {
+                       rule.support = support;
+                       double antecedentSupport = AssocRuleProducer::supportOf(freqItemIdSets, rule.antecedent);
+                       rule.confidence = support / antecedentSupport;
+                       if (rule.confidence >= minConfidence) {
+                               rules.push_back(rule);
+                       }
+               }
+       }
+       return rules;
+}
+
+double ctx::AssocRuleProducer::supportOf(
+               const std::list<std::pair<ctx::ItemIdSet, double>> &allFreqItemIdSets, const ctx::ItemIdSet &wanted)
+{
+       for (auto & pair : allFreqItemIdSets) {
+               if (pair.first == wanted) {
+                       return pair.second;
+               }
+       }
+       return 0.0;
+}
diff --git a/src/prediction/assoc_rule_producer.h b/src/prediction/assoc_rule_producer.h
new file mode 100644 (file)
index 0000000..03eaecf
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ASSOC_RULE_PRODUCER_H_
+#define ASSOC_RULE_PRODUCER_H_
+
+#include "assoc_rule_of_ids.h"
+#include "basket.h"
+#include "i_item_id_filter.h"
+#include <list>
+
+namespace ctx {
+
+       class AssocRuleProducer {
+
+       public:
+               static std::list<AssocRuleOfIds> generate_rules(
+                               const std::list<std::pair<ItemIdSet,double>> &freqItemIdSets,
+                               const IItemIdFilter &itemIdFilter,
+                               double minConfidence);
+
+               static AssocRuleOfIds rule_template(const ItemIdSet &sourceItemIdSet, const IItemIdFilter &antecedentItemIdFilter);
+
+       private:
+               static double supportOf(const std::list<std::pair<ItemIdSet,double>> &allFreqItemIdSets, const ItemIdSet &wanted);
+
+       };
+
+}      /* namespace ctx */
+
+#endif /* ASSOC_RULE_PRODUCER_H_ */
diff --git a/src/prediction/basket.cpp b/src/prediction/basket.cpp
new file mode 100644 (file)
index 0000000..35357ce
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "basket.h"
+
+void ctx::Basket::compute_bitset(int maxId)
+{
+       mBitSet.resize(maxId+1, false);
+       for (int itemId : itemIdSet) {
+               mBitSet[itemId] = true;
+       }
+}
+
+bool ctx::Basket::includes(const ctx::ItemIdSet& potentialSubset)
+{
+       for (int itemId : potentialSubset) {
+               if (!mBitSet[itemId]) {
+                       return false;
+               }
+       }
+       return true;
+}
diff --git a/src/prediction/basket.h b/src/prediction/basket.h
new file mode 100644 (file)
index 0000000..2b40eb6
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INCLUDE_BASKET_H_
+#define INCLUDE_BASKET_H_
+
+#include "item_id_set.h"
+#include <vector>
+
+namespace ctx {
+
+       class Basket {
+
+       public:
+               ItemIdSet itemIdSet;
+               int weight;
+
+               Basket(const ItemIdSet& itemIdSet_, const int& weight_) : itemIdSet(itemIdSet_), weight(weight_) {};
+               void compute_bitset(int maxId);
+               bool includes(const ItemIdSet& potentialSubset); // compute_bitset() should be invoked first
+
+       private:
+               std::vector<bool> mBitSet;
+
+       };
+
+       // TODO If there is no "inline" below the project does not compile.
+       inline bool operator==(const Basket& left, const Basket& right)
+       {
+               return left.weight == right.weight && left.itemIdSet == right.itemIdSet;
+       }
+
+}      /* namespace ctx */
+
+#endif /* INCLUDE_BASKET_H_ */
diff --git a/src/prediction/basket_compressor.cpp b/src/prediction/basket_compressor.cpp
new file mode 100644 (file)
index 0000000..f46c40d
--- /dev/null
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "basket_compressor.h"
+#include <sstream>
+#include <map>
+
+std::string ctx::BasketCompressor::itemset_string(const ctx::ItemIdSet& itemIdSet)
+{
+       std::stringstream ss;
+       for (int itemId : itemIdSet) {
+               ss << itemId << ',';
+       }
+       return ss.str();
+}
+
+std::list<ctx::Basket> ctx::BasketCompressor::compress(const std::list<ctx::Basket>& inputBaskets)
+{
+       std::map<std::string,Basket> basketsMap;
+       // TODO: maybe it is worth to resign from string keys and use std::map<ItemSet,weight> instead.
+
+       for (const Basket& basket : inputBaskets) {
+               std::string itemSetStr = itemset_string(basket.itemIdSet);
+
+               auto findResult = basketsMap.find(itemSetStr);
+               if (findResult == basketsMap.end()) {
+                       basketsMap.insert(std::pair<std::string,Basket>(itemSetStr, basket));
+               } else {
+                       Basket& compressedBasket = findResult->second;
+                       compressedBasket.weight += basket.weight;
+               }
+       }
+
+       std::list<Basket> compressedBaskets;
+       for (auto pair : basketsMap) {
+               compressedBaskets.push_back(pair.second);
+       }
+       return compressedBaskets;
+}
diff --git a/src/prediction/basket_compressor.h b/src/prediction/basket_compressor.h
new file mode 100644 (file)
index 0000000..a0f5a95
--- /dev/null
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef BASKET_COMPRESSOR_H_
+#define BASKET_COMPRESSOR_H_
+
+#include <list>
+#include <string>
+#include "basket.h"
+
+namespace ctx {
+
+       class BasketCompressor {
+
+       public:
+               static std::list<Basket> compress(const std::list<Basket> &inputBaskets);
+
+       private:
+               static std::string itemset_string(const ItemIdSet &itemIdSet);
+
+       };
+
+}      /* namespace ctx */
+
+#endif /* BASKET_COMPRESSOR_H_ */
diff --git a/src/prediction/basket_filter.cpp b/src/prediction/basket_filter.cpp
new file mode 100644 (file)
index 0000000..da03b6c
--- /dev/null
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "basket_filter.h"
+
+void ctx::BasketFilter::filter_in_place(std::list<ctx::Basket>& baskets, const ctx::IItemIdFilter& itemIdFilter)
+{
+       baskets.remove_if([&itemIdFilter] (Basket &basket) -> bool { return !BasketPass(basket, itemIdFilter); });
+}
+
+bool ctx::BasketFilter::BasketPass(const ctx::Basket& basket, const ctx::IItemIdFilter& itemIdFilter)
+{
+       for (int itemId : basket.itemIdSet) {
+               if (itemIdFilter.pass(itemId)) {
+                       return true;
+               }
+       }
+       return false;
+}
diff --git a/src/prediction/basket_filter.h b/src/prediction/basket_filter.h
new file mode 100644 (file)
index 0000000..18a03fd
--- /dev/null
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef BASKET_FILTER_H_
+#define BASKET_FILTER_H_
+
+#include "basket.h"
+#include "i_item_id_filter.h"
+#include <list>
+
+namespace ctx {
+
+       class BasketFilter {
+
+       public:
+               static void filter_in_place(std::list<Basket> &inputBaskets, const IItemIdFilter &itemIdFilter);
+
+       private:
+               static bool BasketPass(const Basket &basket, const IItemIdFilter &itemIdFilter);
+
+       };
+
+}      /* namespace ctx */
+
+#endif /* BASKET_FILTER_H_ */
diff --git a/src/prediction/basket_producer.cpp b/src/prediction/basket_producer.cpp
new file mode 100644 (file)
index 0000000..99a3af9
--- /dev/null
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "basket_producer.h"
+#include <set>
+
+void ctx::BasketProducer::put_event(const ctx::Event& event)
+{
+       int iid = mItemCatalogue.id_of(event.item);
+       put_change(event.interval.start, iid);  // positive value indicates start
+       put_change(event.interval.end,  -iid);  // negative value indicates end
+}
+
+std::list<ctx::Basket> ctx::BasketProducer::make_baskets()
+{
+       std::list<Basket> baskets;
+
+       time_t intervalStart;
+       std::set<int> currentItemIds;
+
+       bool first = true;
+       for (auto changePair : mChanges) {
+               if (first) {
+                       first = false;
+               } else {
+                       time_t intervalEnd = changePair.first;
+                       int weight = intervalEnd - intervalStart;
+                       ItemIdSet itemIdSet;
+                       for (int itemId : currentItemIds) {
+                               itemIdSet.push_back(itemId);
+                       }
+                       baskets.push_back(Basket(itemIdSet, weight));
+               }
+
+               for (int itemId : changePair.second) {
+                       if (itemId > 0) {  // item with itemId starts now
+                               currentItemIds.insert(itemId);
+                       } else {           // item with -itemId ends now
+                               currentItemIds.erase(-itemId);
+                       }
+               }
+
+               intervalStart = changePair.first;
+       }
+
+       return baskets;
+}
+
+void ctx::BasketProducer::put_change(time_t time, int value)
+{
+       if (mChanges.find(time) == mChanges.end()) {
+               mChanges[time] = std::list<int>();
+       }
+       mChanges[time].push_back(value);
+}
diff --git a/src/prediction/basket_producer.h b/src/prediction/basket_producer.h
new file mode 100644 (file)
index 0000000..c265d94
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INCLUDE_BASKET_PRODUCER_H_
+#define INCLUDE_BASKET_PRODUCER_H_
+
+#include "event.h"
+#include "basket.h"
+#include "item_catalogue.h"
+#include <list>
+
+namespace ctx {
+
+       class BasketProducer {
+
+       public:
+               BasketProducer(ItemCatalogue& itemCatalogue) : mItemCatalogue(itemCatalogue) {};
+               void put_event(const Event& e);
+               std::list<Basket> make_baskets();
+
+       private:
+               void put_change(time_t time, int value);
+
+               ItemCatalogue& mItemCatalogue;
+
+               /*
+                * Changes in timestamps.
+                * If an item starts than its id is stored.
+                * If an item ends than its negated id is stored.
+                */
+               std::map<time_t, std::list<int>> mChanges;
+
+       };
+
+}      /* namespace ctx */
+
+#endif /* INCLUDE_BASKET_PRODUCER_H_ */
diff --git a/src/prediction/baskets_agregator.cpp b/src/prediction/baskets_agregator.cpp
new file mode 100644 (file)
index 0000000..7a57e9b
--- /dev/null
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "baskets_agregator.h"
+
+ctx::BasketsAgregator::BasketsAgregator(std::list<ctx::Basket>& baskets) : mBaskets(baskets)
+{
+}
+
+ctx::ItemIdSet ctx::BasketsAgregator::unique_item_ids()
+{
+       ItemIdSet allItemIds;
+       for (const Basket& basket : mBaskets) {
+               for (int itemId : basket.itemIdSet) {
+                       allItemIds.push_back(itemId);
+               }
+       }
+       allItemIds.sort();
+       allItemIds.unique();
+       return allItemIds;
+}
+
+void ctx::BasketsAgregator::generate_bitsets(int maxId)
+{
+       for (Basket& basket : mBaskets) {
+               basket.compute_bitset(maxId);
+       }
+}
+
+double ctx::BasketsAgregator::supportOf(const ctx::ItemIdSet& itemIdSet)
+{
+       int matchedCount = 0;
+       int unmatchedCount = 0;
+       for (Basket& basket : mBaskets) {
+               if (basket.includes(itemIdSet)) {
+                       matchedCount += basket.weight;
+               } else {
+                       unmatchedCount += basket.weight;
+               }
+       }
+       return (double) matchedCount / (double) (matchedCount + unmatchedCount);
+}
diff --git a/src/prediction/baskets_agregator.h b/src/prediction/baskets_agregator.h
new file mode 100644 (file)
index 0000000..e3093fe
--- /dev/null
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INCLUDE_BASKETS_AGREGATOR_H_
+#define INCLUDE_BASKETS_AGREGATOR_H_
+
+#include <list>
+#include <memory>
+#include "basket.h"
+
+namespace ctx {
+
+       class BasketsAgregator {
+
+       public:
+               BasketsAgregator(std::list<Basket>& baskets);
+               void generate_bitsets(int maxId);
+               ItemIdSet unique_item_ids();
+               double supportOf(const ItemIdSet &itemIdSet);
+
+       private:
+               std::list<Basket>& mBaskets;
+
+       };
+
+}      /* namespace ctx */
+
+#endif /* INCLUDE_BASKETS_AGREGATOR_H_ */
diff --git a/src/prediction/event.cpp b/src/prediction/event.cpp
new file mode 100644 (file)
index 0000000..817e108
--- /dev/null
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "event.h"
+
+bool ctx::operator==(const ctx::Event& left, const ctx::Event& right)
+{
+       return left.item == right.item && left.interval == right.interval;
+}
+
+bool ctx::operator!=(const ctx::Event& left, const ctx::Event& right)
+{
+       return !(left == right);
+}
diff --git a/src/prediction/event.h b/src/prediction/event.h
new file mode 100644 (file)
index 0000000..029296f
--- /dev/null
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INCLUDE_EVENT_H_
+#define INCLUDE_EVENT_H_
+
+#include "item.h"
+#include "interval.h"
+
+namespace ctx {
+
+       struct Event {
+               Item item;
+               Interval interval;
+
+               /* long time event */
+               Event(const Item& item_, const Interval& interval_) : item(item_), interval(interval_) {}
+
+               /* instant event */
+               Event(const Item& item_, const time_t& start_time_) : item(item_), interval(start_time_, start_time_ + 1) {}
+       };
+
+       bool operator==(const Event& left, const Event& right);
+       bool operator!=(const Event& left, const Event& right);
+
+}      /* namespace ctx */
+
+#endif /* INCLUDE_EVENT_H_ */
diff --git a/src/prediction/event_set.h b/src/prediction/event_set.h
new file mode 100644 (file)
index 0000000..dda0bc2
--- /dev/null
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INCLUDE_EVENT_SET_H_
+#define INCLUDE_EVENT_SET_H_
+
+#include "event.h"
+
+namespace ctx {
+
+       typedef std::list<Event> EventSet;
+
+}      /* namespace ctx */
+
+#endif /* INCLUDE_EVENT_SET_H_ */
diff --git a/src/prediction/i_item_id_filter.h b/src/prediction/i_item_id_filter.h
new file mode 100644 (file)
index 0000000..dd4c386
--- /dev/null
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef I_ITEM_ID_FILTER_H_
+#define I_ITEM_ID_FILTER_H_
+
+namespace ctx {
+
+       class IItemIdFilter {
+
+       public:
+               virtual ~IItemIdFilter() {}
+               virtual bool pass(int itemId) const = 0;
+
+       };
+
+}      /* namespace ctx */
+
+#endif /* I_ITEM_ID_FILTER_H_ */
diff --git a/src/prediction/interval.cpp b/src/prediction/interval.cpp
new file mode 100644 (file)
index 0000000..b6dc8f9
--- /dev/null
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "interval.h"
+
+bool ctx::operator==(const Interval& left, const Interval& right)
+{
+       return left.start == right.start && left.end == right.end;
+}
+
+bool ctx::operator!=(const Interval& left, const Interval& right)
+{
+       return !(left == right);
+}
diff --git a/src/prediction/interval.h b/src/prediction/interval.h
new file mode 100644 (file)
index 0000000..2153aa0
--- /dev/null
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INCLUDE_INTERVAL_H_
+#define INCLUDE_INTERVAL_H_
+
+#include <ctime>
+
+namespace ctx {
+
+       struct Interval {
+               time_t start;
+               time_t end;
+
+               Interval(time_t start_, time_t end_) : start(start_), end(end_) {}
+       };
+
+       bool operator==(const Interval& left, const Interval& right);
+       bool operator!=(const Interval& left, const Interval& right);
+
+}      /* namespace ctx */
+
+#endif /* INCLUDE_INTERVAL_H_ */
diff --git a/src/prediction/item.cpp b/src/prediction/item.cpp
new file mode 100644 (file)
index 0000000..9a80ca9
--- /dev/null
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "item.h"
+
+ctx::Item::Item(const std::string &s)
+{
+       size_t leftBracket = s.find('[');
+       size_t rightBracket = s.find(']', leftBracket);
+       category = s.substr(leftBracket + 1, rightBracket - 1);
+       value = s.substr(rightBracket + 1);
+       return;
+}
+
+bool ctx::Item::operator==(const ctx::Item& other) const
+{
+       return other.category == category && other.value == value;
+}
+
+bool ctx::Item::operator!=(const ctx::Item& other) const
+{
+       return !operator==(other);
+}
+
+std::ostream& ctx::operator<<(std::ostream& out, const ctx::Item& item)
+{
+       return out << '[' << item.category << ']' << item.value;
+}
diff --git a/src/prediction/item.h b/src/prediction/item.h
new file mode 100644 (file)
index 0000000..edf7d28
--- /dev/null
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INCLUDE_ITEM_H_
+#define INCLUDE_ITEM_H_
+
+#include <string>
+#include <iostream>
+
+namespace ctx {
+
+       struct Item {
+               std::string category;
+               std::string value;
+
+               Item(const std::string &category_, const std::string &value_) : category(category_), value(value_) {}
+               Item(const std::string &s); // in the form of "[Category]Value"
+
+               bool operator==(const Item& other) const;
+               bool operator!=(const Item& other) const;
+       };
+
+       std::ostream& operator<<(std::ostream& out, const Item& item);
+
+}      /* namespace ctx */
+
+namespace std {
+
+       template <> struct hash<ctx::Item> {
+               size_t operator()(const ctx::Item &item) const {
+                       hash<std::string> hasher;
+                       return hasher(item.category + ":" + item.value);
+               }
+       };
+
+}
+
+#endif /* INCLUDE_ITEM_H_ */
diff --git a/src/prediction/item_catalogue.cpp b/src/prediction/item_catalogue.cpp
new file mode 100644 (file)
index 0000000..666b5a8
--- /dev/null
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "item_catalogue.h"
+#include "item_string_converter.h"
+
+int ctx::ItemCatalogue::id_of(const ctx::Item& item)
+{
+       std::string s = ItemStringConverter::item_to_string(item);
+       std::map<std::string,int>::iterator it = mItemIdsOfStrings.find(s);
+       if (it == mItemIdsOfStrings.end()) { // new item
+               mMaxId++;
+               mItemIdsOfStrings[s] = mMaxId;
+               mItemStrings.push_back(s);
+               extendCategoryItemIds(item.category, mMaxId);
+               return mMaxId;
+       } else { // existing item
+               return mItemIdsOfStrings[s];
+       }
+}
+
+bool ctx::ItemCatalogue::exists_item_of_id(int id) const
+{
+       return id > 0 && id <= mMaxId;
+}
+
+ctx::Item ctx::ItemCatalogue::item_of_id(int id) const
+{
+       return ItemStringConverter::string_to_item(mItemStrings[id]);
+}
+
+std::set<int> ctx::ItemCatalogue::category_item_ids(std::string category) const
+{
+       auto it = mCategoryItemIds.find(category);
+       if (it == mCategoryItemIds.end()) {
+               return std::set<int>();
+       } else {
+               return it->second;
+       }
+}
+
+void ctx::ItemCatalogue::extendCategoryItemIds(std::string category, int itemId)
+{
+       if (mCategoryItemIds.find(category) == mCategoryItemIds.end()) {
+               mCategoryItemIds[category] = std::set<int>();
+       }
+       mCategoryItemIds[category].insert(itemId);
+}
diff --git a/src/prediction/item_catalogue.h b/src/prediction/item_catalogue.h
new file mode 100644 (file)
index 0000000..96b5279
--- /dev/null
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INCLUDE_ITEMCATALOGUE_H_
+#define INCLUDE_ITEMCATALOGUE_H_
+
+#include "item.h"
+#include <map>
+#include <set>
+#include <vector>
+
+namespace ctx {
+
+       class ItemCatalogue {
+
+       public:
+               ItemCatalogue() : mMaxId(0) {
+                       mItemStrings.push_back(std::string(""));
+               }
+
+               int id_of(Item const &item);
+
+               bool exists_item_of_id(int id) const;
+               Item item_of_id(int id) const;
+               std::set<int> category_item_ids(std::string category) const;
+               int maxId() const {return mMaxId;};
+
+       private:
+               /* Translates item string to its id */
+               std::map<std::string,int> mItemIdsOfStrings;
+
+               /* Translating category string to int */
+               std::map<std::string,std::set<int>> mCategoryItemIds;
+
+               /* Stores item strings (index is item's id) */
+               std::vector<std::string> mItemStrings;
+
+               /* Preserves maximal item's id */
+               int mMaxId;
+
+               void extendCategoryItemIds(std::string category, int itemId);
+
+       };
+
+}      /* namespace ctx */
+
+#endif /* INCLUDE_ITEMCATALOGUE_H_ */
diff --git a/src/prediction/item_id_set.h b/src/prediction/item_id_set.h
new file mode 100644 (file)
index 0000000..0604fed
--- /dev/null
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INCLUDE_ITEM_ID_SET_H_
+#define INCLUDE_ITEM_ID_SET_H_
+
+#include <list>
+
+namespace ctx {
+
+       /* A set storing item ids */
+       typedef std::list<int> ItemIdSet;
+
+}      /* namespace ctx */
+
+#endif /* INCLUDE_ITEM_ID_SET_H_ */
diff --git a/src/prediction/item_set.cpp b/src/prediction/item_set.cpp
new file mode 100644 (file)
index 0000000..c8dc55b
--- /dev/null
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "item_set.h"
+#include <algorithm>
+
+std::ostream& ctx::operator<<(std::ostream& out, const ctx::ItemSet& itemSet)
+{
+       bool first = true;
+       for (auto& item : itemSet) {
+               if (first) {
+                       first = false;
+               } else {
+                       out << ", ";
+               }
+               out << item;
+       }
+       return out;
+}
+
+bool ctx::itemset_includes_in(const ctx::ItemSet& small, const ctx::ItemSet& big)
+{
+       for (const Item& s : small)
+               if (std::find(big.begin(), big.end(), s) == big.end())
+                       return false;
+       return true;
+}
diff --git a/src/prediction/item_set.h b/src/prediction/item_set.h
new file mode 100644 (file)
index 0000000..98a1b6d
--- /dev/null
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INCLUDE_ITEM_SET_H_
+#define INCLUDE_ITEM_SET_H_
+
+#include <list>
+#include "item.h"
+
+namespace ctx {
+
+       /* A set storing item ids */
+       typedef std::list<Item> ItemSet;
+
+       std::ostream& operator<<(std::ostream& out, const ItemSet& itemSet);
+
+       bool itemset_includes_in(const ItemSet& small, const ItemSet& big);
+
+}      /* namespace ctx */
+
+#endif /* INCLUDE_ITEM_SET_H_ */
diff --git a/src/prediction/item_string_converter.cpp b/src/prediction/item_string_converter.cpp
new file mode 100644 (file)
index 0000000..399847a
--- /dev/null
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "item_string_converter.h"
+#include <string>
+
+std::string ctx::ItemStringConverter::item_to_string(const ctx::Item &item)
+{
+       return item.category + SEPARATOR + item.value;
+}
+
+ctx::Item ctx::ItemStringConverter::string_to_item(const std::string& s)
+{
+       size_t pos = s.find(SEPARATOR);
+       //assert(pos != std::string::npos);
+       return Item(s.substr(0, pos), s.substr(pos + 1));
+}
diff --git a/src/prediction/item_string_converter.h b/src/prediction/item_string_converter.h
new file mode 100644 (file)
index 0000000..961c1f3
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INCLUDE_ITEMSTRINGCONVERTER_H_
+#define INCLUDE_ITEMSTRINGCONVERTER_H_
+
+#include "item.h"
+
+namespace ctx {
+
+       class ItemStringConverter {
+
+       public:
+               static const char SEPARATOR = ':';
+               static std::string item_to_string(const Item &item);
+               static Item string_to_item(const std::string& s);
+
+       };
+
+}      /* namespace ctx */
+
+#endif /* INCLUDE_ITEMSTRINGCONVERTER_H_ */
diff --git a/src/prediction/single_category_item_id_filter.cpp b/src/prediction/single_category_item_id_filter.cpp
new file mode 100644 (file)
index 0000000..6646e8d
--- /dev/null
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "single_category_item_id_filter.h"
+
+ctx::SingleCategoryItemIdFilter::SingleCategoryItemIdFilter(std::string category, const ctx::ItemCatalogue &itemCatalogue)
+{
+       mAcceptedItemIds = itemCatalogue.category_item_ids(category);
+}
+
+bool ctx::SingleCategoryItemIdFilter::pass(int itemId) const
+{
+       return mAcceptedItemIds.find(itemId) != mAcceptedItemIds.end();
+}
diff --git a/src/prediction/single_category_item_id_filter.h b/src/prediction/single_category_item_id_filter.h
new file mode 100644 (file)
index 0000000..4ef9cf4
--- /dev/null
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SINGLE_CATEGORY_ITEM_ID_FILTER_H_
+#define SINGLE_CATEGORY_ITEM_ID_FILTER_H_
+
+#include "i_item_id_filter.h"
+#include "item_catalogue.h"
+
+namespace ctx {
+
+       class SingleCategoryItemIdFilter : public IItemIdFilter {
+
+       public:
+               SingleCategoryItemIdFilter(std::string category, const ItemCatalogue& itemCatalogue);
+               bool pass(int itemId) const override;
+
+       private:
+               std::set<int> mAcceptedItemIds;
+
+       };
+
+}      /* namespace ctx */
+
+#endif /* SINGLE_CATEGORY_ITEM_ID_FILTER_H_ */
diff --git a/src/prediction/weight_apriori.cpp b/src/prediction/weight_apriori.cpp
new file mode 100644 (file)
index 0000000..92aaebc
--- /dev/null
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "weight_apriori.h"
+#include <algorithm>
+#include <iostream>
+
+std::list<ctx::ItemIdSet> ctx::WeightApriori::find_wider_candidates(const std::list<ctx::ItemIdSet> &narrowFreqSets, int widerSize)
+{
+       std::list<ItemIdSet> candidates;
+       for (auto iter1 = narrowFreqSets.begin(); iter1 != narrowFreqSets.end(); ++iter1) {
+               auto iter2 = iter1; ++iter2;
+               for (;iter2 != narrowFreqSets.end(); ++iter2) {
+                       ItemIdSet itemIdSet1 = *iter1;
+                       ItemIdSet itemIdSet2 = *iter2;
+                       itemIdSet1.merge(itemIdSet2);
+                       itemIdSet1.unique();
+                       if (int(itemIdSet1.size()) == widerSize) {
+                               candidates.push_back(itemIdSet1);
+                       }
+               }
+       }
+       candidates.sort();
+       candidates.unique();
+       return candidates;
+}
+
+void ctx::WeightApriori::prune_wider_candidates(std::list<ctx::ItemIdSet>& widerCandidates, const std::list<ctx::ItemIdSet>& narrowFreqSets)
+{
+       std::list<ItemIdSet> pruned;
+       for (auto iter = widerCandidates.begin(); iter != widerCandidates.end();) {
+               if (survives_prunning(*iter, narrowFreqSets)) {
+                       ++iter;
+               } else {
+                       iter = widerCandidates.erase(iter);
+               }
+       }
+}
+
+bool ctx::WeightApriori::survives_prunning(ctx::ItemIdSet& widerCandidate, const std::list<ctx::ItemIdSet>& narrowFreqSets)
+{
+       int removedItemId = widerCandidate.front();
+       widerCandidate.pop_front();
+       for (auto iter = widerCandidate.begin();; ++iter) {
+               if (std::find(narrowFreqSets.begin(), narrowFreqSets.end(), widerCandidate) == narrowFreqSets.end()) {
+                       return false; // Caution: In this case the original contents of widerCandidate is not preserved.
+               }
+               if (iter == widerCandidate.end()) {
+                       widerCandidate.push_back(removedItemId); // Restoring original contents of widerCandidate.
+                       return true;
+               }
+               int newRemovedItemId = *iter;
+               *iter = removedItemId;
+               removedItemId = newRemovedItemId;
+       }
+}
+
+std::list<std::pair<ctx::ItemIdSet, double>> ctx::WeightApriori::find_frequent_itemid_sets(ctx::BasketsAgregator &basketAgregator, double supportThreshold)
+{
+       std::list<std::pair<ItemIdSet, double>> allFreqItemsets;
+       std::list<ItemIdSet> freqItemsetsOfCurrSize;
+       std::list<ItemIdSet> candidates;
+
+       int candidateSize = 1;
+       candidates = single_size_candidates(basketAgregator);
+
+       while (!candidates.empty()) {
+               for (ItemIdSet candidate : candidates) {
+                       double support = basketAgregator.supportOf(candidate);
+                       if (support >= supportThreshold) {
+                               freqItemsetsOfCurrSize.push_back(candidate);
+                               allFreqItemsets.push_back(std::make_pair(candidate,support));
+                       }
+               }
+               candidates = find_wider_candidates(freqItemsetsOfCurrSize, ++candidateSize);
+               freqItemsetsOfCurrSize.clear();
+       }
+       return allFreqItemsets;
+}
+
+std::list<ctx::ItemIdSet> ctx::WeightApriori::single_size_candidates(ctx::BasketsAgregator& basketAgregator)
+{
+       std::list<ItemIdSet> candidates;
+       for (int itemId : basketAgregator.unique_item_ids()) {
+               candidates.push_back({itemId});
+       }
+       return candidates;
+}
diff --git a/src/prediction/weight_apriori.h b/src/prediction/weight_apriori.h
new file mode 100644 (file)
index 0000000..94aecf2
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INCLUDE_WEIGHT_APRIORI_H_
+#define INCLUDE_WEIGHT_APRIORI_H_
+
+#include "baskets_agregator.h"
+#include <list>
+#include <utility>
+
+namespace ctx {
+
+       class WeightApriori {
+
+       public:
+               static std::list<ItemIdSet> find_wider_candidates(const std::list<ItemIdSet> &narrowFreqSets, int widerSize);
+               static void prune_wider_candidates(std::list<ItemIdSet> &widerCandidates, const std::list<ItemIdSet> &narrowFreqSets);
+
+               static bool survives_prunning(ItemIdSet &widerCandidate, const std::list<ItemIdSet> &narrowFreqSets);
+
+               /* Find frequent itemsets along with its support value. */
+               static std::list<std::pair<ItemIdSet,double>> find_frequent_itemid_sets(BasketsAgregator &basketAgregator, double supportThreshold);
+
+       private:
+               static std::list<ItemIdSet> single_size_candidates(BasketsAgregator &basketAgregator);
+
+       };
+
+}      /* namespace ctx */
+
+#endif /* INCLUDE_WEIGHT_APRIORI_H_ */