From d4919df1179b15333c7e627590130a2ccfb68e2b Mon Sep 17 00:00:00 2001 From: Junru Shao Date: Tue, 15 Sep 2020 16:36:10 -0700 Subject: [PATCH] [Minor] Fix typos in Ansor (#6425) --- include/tvm/auto_scheduler/compute_dag.h | 12 +++--- include/tvm/auto_scheduler/feature.h | 4 +- include/tvm/auto_scheduler/loop_state.h | 2 +- include/tvm/auto_scheduler/measure.h | 16 ++++---- python/tvm/auto_scheduler/cost_model/cost_model.py | 8 ++-- python/tvm/auto_scheduler/cost_model/xgb_model.py | 29 -------------- python/tvm/auto_scheduler/feature.py | 2 +- python/tvm/auto_scheduler/loop_state.py | 46 ---------------------- python/tvm/auto_scheduler/measure.py | 6 +-- python/tvm/auto_scheduler/measure_record.py | 4 +- python/tvm/auto_scheduler/search_policy.py | 6 +-- python/tvm/auto_scheduler/utils.py | 2 +- python/tvm/auto_scheduler/workload_registry.py | 2 +- src/auto_scheduler/compute_dag.cc | 24 +++++------ src/auto_scheduler/cost_model.cc | 2 +- src/auto_scheduler/feature.cc | 16 ++++---- src/auto_scheduler/loop_state.cc | 2 +- src/auto_scheduler/measure.cc | 10 ++--- .../search_policy/sketch_policy_rules.cc | 8 ++-- src/auto_scheduler/search_policy/utils.cc | 6 +-- src/auto_scheduler/search_policy/utils.h | 12 +++--- src/auto_scheduler/transform_step.cc | 2 +- 22 files changed, 73 insertions(+), 148 deletions(-) diff --git a/include/tvm/auto_scheduler/compute_dag.h b/include/tvm/auto_scheduler/compute_dag.h index 1d65850..553008a 100755 --- a/include/tvm/auto_scheduler/compute_dag.h +++ b/include/tvm/auto_scheduler/compute_dag.h @@ -53,11 +53,11 @@ class AccessAnalyzerNode : public Object { using OperationMap = std::unordered_map; /*! \brief Map an operation to all operations it reads from. - * For each operation pair, use a two-dimentional array for multiple multi-dimentional accesses + * For each operation pair, use a two-dimensional array for multiple multi-dimensional accesses * The inner vector represents the indices of multi-dimensional access.*/ OperationMap>>> read_from; /*! \brief Map an operation to all operations it is read by. - * For each operation pair, use a two-dimentional array for multiple multi-dimentional accesses + * For each operation pair, use a two-dimensional array for multiple multi-dimensional accesses * The inner vector represents the indices of multi-dimensional access.*/ OperationMap>>> read_by; /*! \brief Store the number of common outer iterators for operation pairs that have @@ -67,7 +67,7 @@ class AccessAnalyzerNode : public Object { * (e.g., injective, broadcast and elementwise ops without reduction) */ OperationMap is_simple_access; /*! \brief Store whether the operation is strictly inlineable - * (e.g., injective, broadcast and elementwise without reduction, branch or expenive operations) + * (e.g., injective, broadcast and elementwise without reduction, branch or expensive operations) */ OperationMap is_strictly_inlineable; /*! \brief Store whether the operation needs multi-level tiling @@ -98,8 +98,8 @@ class AccessAnalyzer : public ObjectRef { TVM_DLL bool IsSimpleAccess(const te::Operation& op) const; /*! - * \brief Return whether this operation is strictly inlinable - * (e.g., injective, broadcast and elementwise without reduction, branch or expenive operations) + * \brief Return whether this operation is strictly inlineable + * (e.g., injective, broadcast and elementwise without reduction, branch or expensive operations) * \param op The operation */ TVM_DLL bool IsStrictlyInlineable(const te::Operation& op) const; @@ -265,7 +265,7 @@ class ComputeDAG : public ObjectRef { * \brief Since some steps may change the ComputeDAG (e.g. CacheRead/CacheWrite), the initial * ComputeDAG may not be up-to-date. This function replays the given transform steps from the * initial state and returns an up-to-date ComputeDAG. - * \param steps The steps to be replaied. Usually we'll filter out the unused steps to speed up + * \param steps The steps to be replayed. Usually we'll filter out the unused steps to speed up * the replay process, since we only intend to get a ComputeDAG with the up-to-date op stage * structure. * \return The up-to-date ComputeDAG. diff --git a/include/tvm/auto_scheduler/feature.h b/include/tvm/auto_scheduler/feature.h index cce4535..a1782f1 100755 --- a/include/tvm/auto_scheduler/feature.h +++ b/include/tvm/auto_scheduler/feature.h @@ -100,9 +100,9 @@ void GetPerStoreFeaturesFromFile(const std::string& filename, int max_lines, int /*! * \brief Get per-store features from measurement input/result pairs - * \param inputs The meaurement inputs + * \param inputs The measurement inputs * \param results The measurement results - * \param skip_first_n_feature_extraction Skip feature extraction for the first n meaurement pairs + * \param skip_first_n_feature_extraction Skip feature extraction for the first n measurement pairs * \param max_n_bufs The maximum number of extracted buffers for one statement * \param features The returned feature vector. The innermost vector contains the * feature vectors for all BufferStoreNode statements diff --git a/include/tvm/auto_scheduler/loop_state.h b/include/tvm/auto_scheduler/loop_state.h index ba58f37..caff37c 100755 --- a/include/tvm/auto_scheduler/loop_state.h +++ b/include/tvm/auto_scheduler/loop_state.h @@ -458,7 +458,7 @@ namespace std { /*! * \brief The equal_to function for auto_scheduler::State. - * This function checkes the equality by looking at the lowered string format of states. + * This function checks the equality by looking at the lowered string format of states. * If two states with different transform history have the same lowered string format, * they will be considered being equal. */ diff --git a/include/tvm/auto_scheduler/measure.h b/include/tvm/auto_scheduler/measure.h index 8cfc740..349f4f8 100755 --- a/include/tvm/auto_scheduler/measure.h +++ b/include/tvm/auto_scheduler/measure.h @@ -71,7 +71,7 @@ enum class MeasureErrorNO : int { /*! \brief Timeout during run. */ kRunTimeoutError = 7, /*! \brief Unknown error. */ - kUnknonwError = 8, + kUnknownError = 8, }; // Inputs and results of one measurement @@ -104,7 +104,7 @@ class MeasureInput : public ObjectRef { public: /*! * \brief The constructor. - * \param task The SearchTeask of this measure. + * \param task The SearchTask of this measure. * \param state The State to be measured. */ MeasureInput(SearchTask task, State state); @@ -353,7 +353,7 @@ class LocalRunner : public ProgramRunner { public: /*! * \brief The constructor. See the corresponding class in python/tvm/auto_scheduler/measure.py - * for more detailed parameter explaination. + * for more detailed parameter explanation. * \param timeout The timeout limit (in second) for each run. * This is used in a wrapper of the multiprocessing.Process.join(). * \param number The number of times to run the generated code for taking average. @@ -401,7 +401,7 @@ class RPCRunner : public ProgramRunner { public: /*! * \brief The constructor. See the corresponding class in python/tvm/auto_scheduler/measure.py - * for more detailed parameter explaination. + * for more detailed parameter explanation. * \param key The key of the device registered in the RPC tracker. * \param host The host address of the RPC Tracker. * \param port The port of RPC Tracker. @@ -445,7 +445,7 @@ class ProgramMeasurerNode : public Object { /*! \brief Verbosity level. 0 for silent, 1 to output information during program measuring. */ int verbose; /*! \brief The number of max continuous error. */ - int max_continous_error; + int max_continuous_error; /*! \brief Reset book keeping variables */ void Reset(); @@ -472,7 +472,7 @@ class ProgramMeasurerNode : public Object { Array* results); /*! \brief The default max continuous error setting. */ - static const int DEFAULT_MAX_CONTINOUS_ERROR = 150; + static const int DEFAULT_MAX_CONTINUOUS_ERROR = 150; static constexpr const char* _type_key = "auto_scheduler.ProgramMeasurer"; TVM_DECLARE_FINAL_OBJECT_INFO(ProgramMeasurerNode, Object); @@ -491,11 +491,11 @@ class ProgramMeasurer : public ObjectRef { * \param callbacks MeasureCallback to be called after each measure batch. * \param verbose Verbosity level. 0 for silent, 1 to output information during program * measuring. - * \param max_continous_error The number of allowed maximum continuous error. + * \param max_continuous_error The number of allowed maximum continuous error. */ ProgramMeasurer(ProgramBuilder builder, ProgramRunner runner, Optional> callbacks, int verbose, - int max_continous_error = -1); + int max_continuous_error = -1); TVM_DEFINE_MUTABLE_OBJECT_REF_METHODS(ProgramMeasurer, ObjectRef, ProgramMeasurerNode); }; diff --git a/python/tvm/auto_scheduler/cost_model/cost_model.py b/python/tvm/auto_scheduler/cost_model/cost_model.py index 17370d6..83c8463 100644 --- a/python/tvm/auto_scheduler/cost_model/cost_model.py +++ b/python/tvm/auto_scheduler/cost_model/cost_model.py @@ -55,7 +55,7 @@ class RandomModel(CostModel): ---------- search_task : SearchTask The search task of states - statse : List[State] + states : List[State] The input states Returns @@ -126,7 +126,7 @@ class PythonBasedModel(CostModel): ---------- search_task : SearchTask The search task of states - statse : List[State] + states : List[State] The input states Returns @@ -143,7 +143,7 @@ class PythonBasedModel(CostModel): ---------- search_task : SearchTask The search task of states - statse : List[State] + states : List[State] The input states Returns @@ -166,7 +166,7 @@ class PythonBasedModel(CostModel): ... int n_stage_i; // the number of stages in states[i] float stage_scores_1[n_stage_i]; // the scores for all stages in states[i] - ... // untill i == N - 1 + ... // until i == N - 1 } To implement this format, we also store int as float, so we can store all numbers into a single float array. diff --git a/python/tvm/auto_scheduler/cost_model/xgb_model.py b/python/tvm/auto_scheduler/cost_model/xgb_model.py index 8704f2a..1519368 100644 --- a/python/tvm/auto_scheduler/cost_model/xgb_model.py +++ b/python/tvm/auto_scheduler/cost_model/xgb_model.py @@ -44,7 +44,6 @@ class XGBDMatrixContext: def get(self, key, matrix, default=None): """ Get an attribute of a xgb.DMatrix - Parameters ---------- key: str @@ -59,7 +58,6 @@ class XGBDMatrixContext: def set(self, key, matrix, value): """ Set an attribute for a xgb.DMatrix - Parameters ---------- key: str @@ -77,15 +75,12 @@ dmatrix_context = XGBDMatrixContext() class XGBModel(PythonBasedModel): """Train a XGBoost model to predict the normalized throughputs of programs. - Let the normalized throughput be the score of a program (higher is better). We predict the (approximiate) score of a program = the sum of the scores of all stages in this program. i.e. score(P) = score_s0 + score_s1 + ... + score_sn, where score_si is the score of Stage i in Program P. - We extract feature for each stage and let the xgboost predict the score for each stage. We then sum up the predictions as the score of the whole program. - We use RMSE as the loss function. i.e. loss(P, y) = 1/2 * (score(P) - y)^2, where P is the program and y is the normalized throughput according to the ground truth (measurement). @@ -123,7 +118,6 @@ class XGBModel(PythonBasedModel): def update(self, inputs, results): """Update the cost model according to new measurement results (training data). XGBoost does not support incremental training, so we re-train a new model every time. - Parameters ---------- inputs : List[MeasureInput] @@ -175,14 +169,12 @@ class XGBModel(PythonBasedModel): def predict(self, task, states): """Predict the scores of states - Parameters ---------- search_task : SearchTask The search task of states statse : List[State] The input states - Returns ------- scores: List[float] @@ -205,24 +197,20 @@ class XGBModel(PythonBasedModel): def predict_stages(self, task, states): """Predict the scores of all stages in states. This is the breakdown version of `predict`. - Parameters ---------- search_task : SearchTask The search task of states statse : List[State] The input states - Returns ------- scores: List[float] The predicted scores for all stages in all states in the packed format - Note ---- For faster data copy between c++ and python, the python part returns scores in a single flatten array using a packed format. The c++ part then unpacks the flatten array. - The packed format is: { float scores[N]; // scores[i] is the score for states[i]. @@ -269,7 +257,6 @@ class XGBModel(PythonBasedModel): def update_from_file(self, file_name, n_lines=None): """Load measure records from a log file to update the cost model. This function can be used to pre-train the cost model with history log files. - Parameters ---------- file_name: str @@ -283,7 +270,6 @@ class XGBModel(PythonBasedModel): def save(self, file_name: str): """Save the model to a file - Parameters ---------- file_name: str @@ -293,7 +279,6 @@ class XGBModel(PythonBasedModel): def load(self, file_name: str): """Load the model from a file - Parameters ---------- file_name: str @@ -307,12 +292,10 @@ class XGBModel(PythonBasedModel): def feature_to_pack_sum_xgbmatrix(xs): """Convert an extracted multi-stage feature vector to a xgbmatrx in pack-sum format - Parameters ---------- xs: np.ndarray The feature vector - Returns ------- dmatrix: xgb.DMatrix @@ -333,7 +316,6 @@ def feature_to_pack_sum_xgbmatrix(xs): def pack_sum_xgbmatrix(xs, ys, gids=None, weights=None): """Convert (feature, label) pairs into a xgb matrix with pack-sum format - Parameters ---------- xs: np.ndarray @@ -344,7 +326,6 @@ def pack_sum_xgbmatrix(xs, ys, gids=None, weights=None): Group id (task id) weights: Optional[np.ndarray] The weight of samples - Returns ------- dmatrix: xgb.DMatrix @@ -390,14 +371,12 @@ def pack_sum_xgbmatrix(xs, ys, gids=None, weights=None): def predict_throughput_pack_sum(raw_preds, pack_ids): """Predict the throughputs for predictions in pack-sum format - Parameters ---------- raw_preds: np.ndarray The raw predictions pack_ids: List[int] The pack id for predictions - Returns ------- throughputs: np.ndarray @@ -410,14 +389,12 @@ def predict_throughput_pack_sum(raw_preds, pack_ids): def pack_sum_square_error(preds, dtrain): """Implement square error loss on pack-sum format as a custom objective function for xgboost. - Parameters ---------- preds: np.ndarray The predicitons dtrain: xgb.DMatrix The training set - Returns ------- gradient: np.ndarray @@ -441,14 +418,12 @@ def pack_sum_square_error(preds, dtrain): def pack_sum_rmse(raw_preds, labels): """Evaluate RMSE (rooted mean square error) in the pack-sum format - Parameters ---------- raw_preds: np.ndarray The raw prediction labels: xgb.DMatrix The groud-truth label matrix - Returns ------- name: str @@ -462,12 +437,10 @@ def pack_sum_rmse(raw_preds, labels): def pack_sum_average_peak_score(N): """Return the evaluation function for average-peak-score@N - Parameters ---------- N: int The "N" in "average-peak-score@N" - Returns ------- The evaluation function @@ -475,14 +448,12 @@ def pack_sum_average_peak_score(N): def feval(preds, labels): """Evaluate average-peak-score@N in the pack-sum format - Parameters ---------- raw_preds: np.ndarray The raw prediction labels: xgb.DMatrix The groud-truth label matrix - Returns ------- name: str diff --git a/python/tvm/auto_scheduler/feature.py b/python/tvm/auto_scheduler/feature.py index ef42dc6..4c1883a 100644 --- a/python/tvm/auto_scheduler/feature.py +++ b/python/tvm/auto_scheduler/feature.py @@ -79,7 +79,7 @@ def unpack_feature(byte_arr: bytearray) -> Tuple[np.ndarray, np.ndarray, np.ndar float features_i[size[i]]; // The features for record i ... // until i == n - 1 - float throuputs[sizes[n]]; // The normalized throughputs for n records + float throughputs[sizes[n]]; // The normalized throughputs for n records int task_ids[size[n+1]; // The task ids for n records } diff --git a/python/tvm/auto_scheduler/loop_state.py b/python/tvm/auto_scheduler/loop_state.py index 897a682..7cfe6cc 100644 --- a/python/tvm/auto_scheduler/loop_state.py +++ b/python/tvm/auto_scheduler/loop_state.py @@ -18,7 +18,6 @@ """ The definition of the "state" in the search. - Each LoopState corresponds to a schedule for its ComputeDAG. A LoopState consists of: 1. a current loop structure; 2. a list of transformation steps used to construct the loop structure. @@ -29,7 +28,6 @@ During the schedule search process, the loop structure can provide search policy information on how to manipulate the current state. The transform history is a sequence of `TransformStep` which will finally be mapped to TVM schedule primitives. The steps are also used for the serialization of a state. - The LoopState can be seen as a lightweight loop structure IR specifically for schedule search. We don't use the existing TVM IR but to extend a new structure on it is because: 1. We want fast incremental change to the loop structures. The search policy needs to get the @@ -37,7 +35,6 @@ immediate loop structures update rather than after TVM lowering; 2. We want serializable transform history for replay, backtracking, and mutation; 3. We may create some macro schedule primitives that represent the combination of several TVM schedule primitives. - When the search is finished, we will lower the state to TVM IR with TVM's schedule primitives. Since we share a lot of common objects during search, the transformation is implemented in copy on write style. All objects are immutable, which is similar to TVM IR. @@ -75,16 +72,13 @@ class State: """ A state in the search process. It consists of the current loop structure and a list of transformation steps used to construct it. - Each State corresponds to a specific schedule for its ComputeDAG. - Parameters ---------- state_object : StateObject The StateObject corresponding to C++ internal State object. dag : ComputeDAG The original ComputeDAG of this State. - Notes ----- This is a wrapper class of StateObject to deal with copy-on-write property @@ -144,7 +138,6 @@ class State: def bind(self, stage, iterator, thread_name): """Schedule primitive corresponding to `te.Stage.bind`. See also the `te.Stage` for more details. - Parameters ---------- stage : Union[int, Operation, Tensor] @@ -161,7 +154,6 @@ class State: - threadIdx.y - blockIdx.z - threadIdx.z - Returns ------- res_it : Iterator @@ -181,7 +173,6 @@ class State: def parallel(self, stage, iterator): """Schedule primitive corresponding to `te.Stage.parallel`. See also the `te.Stage` for more details. - Parameters ---------- stage : Union[int, Operation, Tensor] @@ -189,7 +180,6 @@ class State: or output tensor of the stage. iterator : Iterator The iterator to be paralleled. - Returns ------- res_it : Iterator @@ -203,7 +193,6 @@ class State: def unroll(self, stage, iterator, max_unroll=None): """Schedule primitive corresponding to `te.Stage.unroll`. See also the `te.Stage` for more details. - Parameters ---------- stage : Union[int, Operation, Tensor] @@ -213,7 +202,6 @@ class State: The iterator to be unrolled. max_unroll : Optional[int] The max unroll limit. Iterator with extent larger than this limit will be skipped. - Returns ------- res_it : Iterator @@ -230,7 +218,6 @@ class State: def vectorize(self, stage, iterator): """Schedule primitive corresponding to `te.Stage.vectorize`. See also the `te.Stage` for more details. - Parameters ---------- stage : Union[int, Operation, Tensor] @@ -238,7 +225,6 @@ class State: or output tensor of the stage. iterator : Iterator The iterator to be vectorized. - Returns ------- res_it : Iterator @@ -252,7 +238,6 @@ class State: def fuse(self, stage, iters): """Schedule primitive corresponding to `te.Stage.fuse`. See also the `te.Stage` for more details. - Parameters ---------- stage : Union[int, Operation, Tensor] @@ -260,12 +245,10 @@ class State: or output tensor of the stage. iters : List[Iterator] The iterators to be fused. - Returns ------- res_it : Iterator The fused Iterator. - Notes ----- If the iterators to be fused have stages attached at them(by compute_at), the fused @@ -279,7 +262,6 @@ class State: def pragma(self, stage, iterator, pragma_type): """Schedule primitive corresponding to `te.Stage.pragma`. See also the `te.Stage` for more details. - Parameters ---------- stage : Union[int, Operation, Tensor] @@ -297,7 +279,6 @@ class State: def reorder(self, stage, order): """Schedule primitive corresponding to `te.Stage.reorder`. See also the `te.Stage` for more details. - Parameters ---------- stage : Union[int, Operation, Tensor] @@ -313,10 +294,8 @@ class State: def split(self, stage, iterator, lengths, inner_to_outer=True): """Schedule primitive corresponding to `te.Stage.split`. See also the `te.Stage` for more details. - This API supports multiple split factors. (e.g. with 2 split factors, the original iterator will be split to 3 parts, use `inner_to_outer` to control the split order) - Parameters ---------- stage : Union[int, Operation, Tensor] @@ -328,12 +307,10 @@ class State: The multiple split factors. Can be None to be filled by search policy. inner_to_outer: boolean = True Whether the factor go from inner to outer, or from outer to inner. - Returns ------- res_its : List[Iterator] The splitted new Iterators. - Notes ----- If we do split on an iterator which has stages attached at it(by compute_at), the inner @@ -346,9 +323,7 @@ class State: def follow_split(self, stage, iterator, src_step_id, n_split): """The schedule primitive similar to split, but uses split factors from previous steps. - This step splits the iterator by the same factors as the given SplitStep. - Notes ------ This step is useful in a scenario that we have subgraph Dense -> Relu, @@ -356,7 +331,6 @@ class State: the same tiling structure of common outer loops. The follow_split step could be used here to split the Dense stage and makes sure its splitting factors are the same as the given split step for the ReLU stage. - Parameters ---------- stage : Union[int, Operation, Tensor] @@ -368,7 +342,6 @@ class State: The index of the split step to be followed in the history. n_split : int The number of split level. - Returns ------- res_its : List[Iterator] @@ -382,10 +355,8 @@ class State: def follow_fused_split(self, stage, iterator, src_step_ids, level, factor_or_nparts): """Schedule primitive extends to split step. - This step is used to split an iterator by the same factors as the given list of SplitSteps and FuseSteps. - Notes ------ This step is useful in a scenario that we have a subgraph @@ -400,7 +371,6 @@ class State: axis is bound to an iterator generated by split & fuse step. The follow_fused_step is used split the iterator to 2 parts, while the split factor matches the final extent of the threadIdx.x bound iterator. - Parameters ---------- stage : Union[int, Operation, Tensor] @@ -415,7 +385,6 @@ class State: factor_or_nparts : bool True to use `factor` for split from inner to outer, False to use `nparts` for split from outer to inner. - Returns ------- res_its : List[Iterator] @@ -435,7 +404,6 @@ class State: def storage_align(self, stage, iterator, factor, offset): """Schedule primitive corresponding to `te.Stage.storage_align`. See also the `te.Stage` for more details. - Parameters ---------- stage : Union[int, Operation, Tensor] @@ -455,7 +423,6 @@ class State: def compute_at(self, stage, target_stage, target_iter): """Schedule primitive corresponding to `te.Stage.compute_at`. See also the `te.Stage` for more details. - Parameters ---------- stage : Union[int, Operation, Tensor] @@ -466,7 +433,6 @@ class State: or output tensor of the stage. target_iter : Iterator The target Iterator of compute_at. - Notes ----- After compute_at, we need careful dependency analysis to compute the accurate bound @@ -484,7 +450,6 @@ class State: def compute_inline(self, stage): """Schedule primitive corresponding to `te.Stage.compute_inline`, see also the `te.Stage` for more details. - Parameters ---------- stage : Union[int, Operation, Tensor] @@ -498,13 +463,11 @@ class State: def compute_root(self, stage): """Schedule primitive corresponding to `te.Stage.compute_root`. Ssee also the `te.Stage` for more details. - Parameters ---------- stage : Union[int, Operation, Tensor] The Stage to be marked compute at root, which can be specified by the integer index, Operation, or output tensor of the stage. - Notes ----- After compute_root, we need careful dependency analysis to compute the accurate bound @@ -519,7 +482,6 @@ class State: def cache_read(self, stage, scope_name, reader_stages): """Schedule primitive corresponding to `te.Schedule.cache_read`. See also the `te.Schedule` for more details. - Parameters ---------- stage : Union[int, Operation, Tensor] @@ -530,12 +492,10 @@ class State: reader_stages : List[Union[int, Operation, Tensor]] The reader stages. Each of the list can be specified by the integer index, Operation, or output tensor of the stage. - Returns ------- new_stage_op : Operator The Operator of the new added stage. - Notes ----- Cache read step will insert an extra stage to the original ComputeDAG (at the back of the @@ -558,7 +518,6 @@ class State: def cache_write(self, stage, scope_name): """Schedule primitive corresponding to `te.Schedule.cache_write`. See also the `te.Schedule` for more details. - Parameters ---------- stage : Union[int, Operation, Tensor] @@ -566,12 +525,10 @@ class State: or output tensor of the stage. scope_name : str The scope name of the newly added compute stage. - Returns ------- new_stage_op : Operator The Operator of the new added stage. - Notes ----- Cache write step will insert an extra stage to the original ComputeDAG (in the front of the @@ -590,7 +547,6 @@ class State: def rfactor(self, stage, iterator, factor_iter_id): """Schedule primitive corresponding to `te.Schedule.rfactor`. See also the `te.Schedule` for more details. - Parameters ---------- stage : Union[int, Operation, Tensor] @@ -600,12 +556,10 @@ class State: The reduction iterator to be factored. factor_iter_id : int The position where the new iterator is placed. - Returns ------- new_stage_op : Operator The Operator of the new added stage. - Notes ----- Rfactor step will insert an extra stage to the original ComputeDAG (in the front of the diff --git a/python/tvm/auto_scheduler/measure.py b/python/tvm/auto_scheduler/measure.py index fd05b2d..c57b39b 100644 --- a/python/tvm/auto_scheduler/measure.py +++ b/python/tvm/auto_scheduler/measure.py @@ -60,7 +60,7 @@ from .utils import ( # The maximum length of error message MAX_ERROR_MSG_LEN = 512 -# We use fork and a global variable to copy arguments between processings. +# We use fork and a global variable to copy arguments between processes. # This can avoid expensive serialization of TVM IR when using multiprocessing.Pool GLOBAL_BUILD_ARGUMENTS = None GLOBAL_RUN_ARGUMENTS = None @@ -475,7 +475,7 @@ def local_build_worker(index): """ global GLOBAL_BUILD_ARGUMENTS - # We use fork and a global variable to copy arguments between processings. + # We use fork and a global variable to copy arguments between processes. # This can avoid expensive serialization of TVM IR when using multiprocessing.Pool if not GLOBAL_BUILD_ARGUMENTS: raise ValueError("GLOBAL_BUILD_ARGUMENTS not found") @@ -563,7 +563,7 @@ def local_builder_build(inputs, timeout, n_parallel, build_func="default", verbo res : List[BuildResult] The build results of these MeasureInputs. """ - # We use fork and a global variable to copy arguments between processings. + # We use fork and a global variable to copy arguments between processes. # This can avoid expensive serialization of TVM IR when using multiprocessing.Pool global GLOBAL_BUILD_ARGUMENTS diff --git a/python/tvm/auto_scheduler/measure_record.py b/python/tvm/auto_scheduler/measure_record.py index b9633d5..0185d04 100644 --- a/python/tvm/auto_scheduler/measure_record.py +++ b/python/tvm/auto_scheduler/measure_record.py @@ -126,10 +126,10 @@ def load_best(filename, workload_key=None, target=None): File name to load log from. workload_key : Optional[str] The workload key of the compute declaration. - With `None`, this retuns the best measure pair of all workloads. + With `None`, this returns the best measure pair of all workloads. target : Optional[tvm.target.Target] The target device. - With `None`, this retuns the best measure pair of all target devices. + With `None`, this returns the best measure pair of all target devices. Returns ------- diff --git a/python/tvm/auto_scheduler/search_policy.py b/python/tvm/auto_scheduler/search_policy.py index 15d84dc..a9d3236 100644 --- a/python/tvm/auto_scheduler/search_policy.py +++ b/python/tvm/auto_scheduler/search_policy.py @@ -192,10 +192,10 @@ class SketchPolicy(SearchPolicy): states = _ffi_api.SketchPolicySampleInitialPopulation(self, pop_size) return states - def evolutionary_search(self, init_populuations, out_size): + def evolutionary_search(self, init_populations, out_size): """Evolutionary search. This python interface is mainly used for debugging and testing. - The actual search is all doen in c++. + The actual search is all done in c++. Parameters ---------- init_populations: List[State] @@ -207,5 +207,5 @@ class SketchPolicy(SearchPolicy): states: List[State] The generated states """ - states = _ffi_api.SketchPolicyEvolutionarySearch(self, init_populuations, out_size) + states = _ffi_api.SketchPolicyEvolutionarySearch(self, init_populations, out_size) return states diff --git a/python/tvm/auto_scheduler/utils.py b/python/tvm/auto_scheduler/utils.py index bbc2d77..ff357c4 100644 --- a/python/tvm/auto_scheduler/utils.py +++ b/python/tvm/auto_scheduler/utils.py @@ -143,7 +143,7 @@ class NoDaemonContext(type(multiprocessing.get_context())): class NoDaemonPool(multiprocessing.pool.Pool): """A no daemon pool version of multiprocessing.Pool. - This allows us to start new processings inside the worker function""" + This allows us to start new processes inside the worker function""" def __init__(self, *args, **kwargs): kwargs["context"] = NoDaemonContext() diff --git a/python/tvm/auto_scheduler/workload_registry.py b/python/tvm/auto_scheduler/workload_registry.py index 6c3b4d1..f0c8398 100644 --- a/python/tvm/auto_scheduler/workload_registry.py +++ b/python/tvm/auto_scheduler/workload_registry.py @@ -124,7 +124,7 @@ def make_workload_key(func, args): def decode_workload_key_to_func_args(workload_key): - """Decode a workload key to the registerd function name and its corresponding args. + """Decode a workload key to the registered function name and its corresponding args. Parameters ---------- diff --git a/src/auto_scheduler/compute_dag.cc b/src/auto_scheduler/compute_dag.cc index 7ebed21..7c9ce4c 100755 --- a/src/auto_scheduler/compute_dag.cc +++ b/src/auto_scheduler/compute_dag.cc @@ -153,7 +153,7 @@ class ReadAccessExtractor : public StmtExprVisitor { } // All read accesses to all operations - // The innermost vector stores mulit-dimentional indices. + // The innermost vector stores multi-dimensional indices. // The middle vector stores possible multiple accesses OperationMap>> read_access; // Whether this expression has branch @@ -683,8 +683,8 @@ class IndexRewriter : public StmtExprMutator { std::unordered_map name_to_arg; for (const auto& arg : op->indices) { std::string axis_name; - if (const auto* pimm = arg.as()) { - CHECK_EQ(pimm->value, 0); + if (const auto* int_imm = arg.as()) { + CHECK_EQ(int_imm->value, 0); axis_name = "IntImm"; } else { axis_name = AxisBaseName(CleanName(Downcast(arg)->name_hint)); @@ -741,8 +741,8 @@ std::string GetOrigLayout(std::set* placeholder_axis_names, const t for (const auto& ev : extractor.read_access[placeholder_op]) { for (const auto& e : ev) { std::string axis_name; - if (const auto* pimm = e.as()) { - CHECK_EQ(pimm->value, 0); + if (const auto* int_imm = e.as()) { + CHECK_EQ(int_imm->value, 0); axis_name = "IntImm"; } else { axis_name = AxisBaseName(CleanName(Downcast(e)->name_hint)); @@ -828,7 +828,7 @@ std::string GetNewLayout(Array* new_shape, const State& state, const i } void ComputeDAG::RewriteLayout(const Array& transform_steps) { - ComputeDAGNode* pdag = this->CopyOnWrite(); + ComputeDAGNode* p_dag = this->CopyOnWrite(); auto node = make_object(); node->transform_steps = transform_steps; node->concrete = true; @@ -877,8 +877,8 @@ void ComputeDAG::RewriteLayout(const Array& transform_steps) { handled_ops.insert(placeholder_op); - Array old_ops = pdag->ops; - ArrayNode* pops = pdag->ops.CopyOnWrite(); + Array old_ops = p_dag->ops; + ArrayNode* pops = p_dag->ops.CopyOnWrite(); // Create new placeholder te::Operation new_placeholder_op; @@ -949,10 +949,10 @@ void ComputeDAG::RewriteLayout(const Array& transform_steps) { } } - pdag->init_state = State(pdag->ops); + p_dag->init_state = State(p_dag->ops); - Array old_tensors = pdag->tensors; - ArrayNode* ptensors = pdag->tensors.CopyOnWrite(); + Array old_tensors = p_dag->tensors; + ArrayNode* p_tensors = p_dag->tensors.CopyOnWrite(); for (size_t i = 0; i < old_tensors.size(); ++i) { const auto& old_tensor = old_tensors[i]; @@ -964,7 +964,7 @@ void ComputeDAG::RewriteLayout(const Array& transform_steps) { } if (new_op.defined()) { auto index = old_tensor->value_index; - ptensors->SetItem(i, new_op.output(index)); + p_tensors->SetItem(i, new_op.output(index)); } } } // end for placeholder diff --git a/src/auto_scheduler/cost_model.cc b/src/auto_scheduler/cost_model.cc index 456e2ef..3d540c7 100755 --- a/src/auto_scheduler/cost_model.cc +++ b/src/auto_scheduler/cost_model.cc @@ -91,7 +91,7 @@ void PythonBasedModelNode::PredictStages(const SearchTask& task, const Array> indices; }; @@ -131,7 +131,7 @@ struct FeatureSet { float vec_num; // The number of vectorized iterators float vec_prod; // The product of the lengths of vectorized iterators float vec_len; // The length of the innermost vectorized iterator - AnnotationPosType vec_type; // The type of vectorizatoin position + AnnotationPosType vec_type; // The type of vectorization position float unroll_num; // The number of unrolled iterators float unroll_prod; // The product of the lengths of vectorized iterators float unroll_len; // The length of the innermost unrolled iterator @@ -158,12 +158,12 @@ struct FeatureSet { // Group 4: Allocation related features float alloc_size; // The size of allocated buffer in bytes - float alloc_outer_prod; // The product of lenghts of loops outside the scope of the allocation - float alloc_inner_prod; // The product of lenghts of loops inside the score of the allocation + float alloc_outer_prod; // The product of lengths of loops outside the scope of the allocation + float alloc_inner_prod; // The product of lengths of loops inside the score of the allocation float alloc_prod; // alloc_outer_prod * alloc_inner_prod // Group 5: Outer scope related features - float outer_prod; // The product of lenghts of outer loops + float outer_prod; // The product of lengths of outer loops float num_loops; // The number of outer loops float auto_unroll_max_step; // The value of pragma "auto_unroll_max_step" }; @@ -221,7 +221,7 @@ AnnotationPosType GetAnnotationPosEncoding(const Var& var, const Array } } else { // If the axis is not found in both spatial args and reduce axis, - // then this stage must compute_at somewhere under this aixs and this axis is simplified out + // then this stage must compute_at somewhere under this axis and this axis is simplified out // We assume it is an outer spatial return AnnotationPosType::kPosOuterSpatial; } @@ -1516,7 +1516,7 @@ void GetPerStoreFeaturesFromMeasurePairs(const Array& inputs, * float features_i[size[i]]; // The features for record i * ... // until i == n - 1 * - * float throuputs[sizes[n]]; // The normalized throughputs for n records + * float throughputs[sizes[n]]; // The normalized throughputs for n records * int task_ids[size[n+1]; // The task ids for n records * * } diff --git a/src/auto_scheduler/loop_state.cc b/src/auto_scheduler/loop_state.cc index e84dd69..c3c764f 100755 --- a/src/auto_scheduler/loop_state.cc +++ b/src/auto_scheduler/loop_state.cc @@ -148,7 +148,7 @@ void AttachMap::DeleteStageEntry(AttachMapNode* pnode, int stage_id) { // We get from this map if (old_entry != pnode->stage_to_attach_iter.end()) { // Delete the stage in `iter_to_attached_stages`, if the corresponding iterator does not have - // any attatched stage, delete this iterm too + // any attached stage, delete this iterm too auto entry2 = pnode->iter_to_attached_stages.find(old_entry->second); // We get > from this map FindAndDeleteItem(&entry2->second, stage_id); diff --git a/src/auto_scheduler/measure.cc b/src/auto_scheduler/measure.cc index 5642126..70ea7ab 100755 --- a/src/auto_scheduler/measure.cc +++ b/src/auto_scheduler/measure.cc @@ -185,15 +185,15 @@ Array RPCRunnerNode::Run(const Array& inputs, /********** ProgramMeasurer **********/ ProgramMeasurer::ProgramMeasurer(ProgramBuilder builder, ProgramRunner runner, Optional> callbacks, int verbose, - int max_continous_error) { + int max_continuous_error) { auto node = make_object(); node->builder = std::move(builder); node->runner = std::move(runner); node->callbacks = std::move(callbacks); node->verbose = verbose; - node->max_continous_error = max_continous_error < 0 - ? ProgramMeasurerNode::DEFAULT_MAX_CONTINOUS_ERROR - : max_continous_error; + node->max_continuous_error = max_continuous_error < 0 + ? ProgramMeasurerNode::DEFAULT_MAX_CONTINUOUS_ERROR + : max_continuous_error; data_ = std::move(node); } @@ -264,7 +264,7 @@ void ProgramMeasurerNode::Measure(const SearchTask& task, const SearchPolicy& po results->push_back(res); } - if (error_ct > max_continous_error) { + if (error_ct > max_continuous_error) { LOG(FATAL) << "Too many errors happened during tuning"; } } diff --git a/src/auto_scheduler/search_policy/sketch_policy_rules.cc b/src/auto_scheduler/search_policy/sketch_policy_rules.cc index 843301c..7e7b447 100644 --- a/src/auto_scheduler/search_policy/sketch_policy_rules.cc +++ b/src/auto_scheduler/search_policy/sketch_policy_rules.cc @@ -325,7 +325,7 @@ SketchGenerationRule::ConditionKind RuleCrossThreadReduction::MeetCondition( // Compute the product of lengths of all space iters and all reduce iters int cum_space_len, cum_reduce_len; std::tie(cum_space_len, cum_reduce_len) = - GetCumulativeSpaceAndReductionLengh(state->stages[stage_id]); + GetCumulativeSpaceAndReductionLength(state->stages[stage_id]); if (NeedsMultilevelTiling(policy.search_task, state, stage_id)) { // Do rfactor if we do not have enough parallelism on space iters @@ -728,7 +728,7 @@ PopulationGenerationRule::ResultKind InitVectorization::Apply(SketchPolicyNode* int num_fusible = 0; while (num_fusible < static_cast(stage->iters.size())) { int iter_id = static_cast(stage->iters.size()) - 1 - num_fusible; - // Stop if this iterator has been a compute at attatch point + // Stop if this iterator has been a compute at attach point if ((*state)->attach_map->iter_to_attached_stages.count(std::make_pair(stage_id, iter_id))) { break; } @@ -823,7 +823,7 @@ PopulationGenerationRule::ResultKind InitThreadBind::Apply(SketchPolicyNode* pol state->bind(stage_id, fused_it, IteratorAnnotation::kThreadX); } else { // Set threadIdx.x = default_warp_size by default. - // The later EvolutionarySearch will try more possiblity + // The later EvolutionarySearch will try more possibility const auto& split_its = state->split( stage_id, fused_it, {Integer(policy->search_task->hardware_params->warp_size)}); state->bind(stage_id, split_its[0], IteratorAnnotation::kBlockX); @@ -910,7 +910,7 @@ PopulationGenerationRule::ResultKind InitThreadBind::Apply(SketchPolicyNode* pol // Fuse all iterators to do cooperative fetching Iterator fused = state->fuse(stage_id, (*state)->stages[stage_id]->iters); // Split out an extra iterator for vectorization - // The later EvolutionarySearch will try more possiblity + // The later EvolutionarySearch will try more possibility const auto& iters0 = state->split(stage_id, fused, {Integer(1)}); state->vectorize(stage_id, iters0[1]); // Follow split to keep a same thread extent with the root stage diff --git a/src/auto_scheduler/search_policy/utils.cc b/src/auto_scheduler/search_policy/utils.cc index a09ea59..62ffce4 100644 --- a/src/auto_scheduler/search_policy/utils.cc +++ b/src/auto_scheduler/search_policy/utils.cc @@ -350,15 +350,15 @@ const Array>& SplitFactorizationMemo::GetFactorizationSchemes( return *results_; } -void SplitFactorizationMemo::DfsEnumerate(int now, int remaining_lenght, int max_innermost_factor) { +void SplitFactorizationMemo::DfsEnumerate(int now, int remaining_length, int max_innermost_factor) { if (now == n_lengths_) { if (tmp_stack_.back().as()->value <= max_innermost_factor) { results_->push_back(tmp_stack_); } } else { - for (const auto& f : GetFactors(remaining_lenght)) { + for (const auto& f : GetFactors(remaining_length)) { tmp_stack_.Set(now, Integer(f)); - DfsEnumerate(now + 1, remaining_lenght / f, max_innermost_factor); + DfsEnumerate(now + 1, remaining_length / f, max_innermost_factor); } } } diff --git a/src/auto_scheduler/search_policy/utils.h b/src/auto_scheduler/search_policy/utils.h index 792102a..d2ba128 100644 --- a/src/auto_scheduler/search_policy/utils.h +++ b/src/auto_scheduler/search_policy/utils.h @@ -302,7 +302,7 @@ inline int64_t GetExtent(const Iterator& it) { } /*! \brief Compute the product of lengths of all space iters and all reduce iters, respectively. */ -inline std::pair GetCumulativeSpaceAndReductionLengh(const Stage& stage) { +inline std::pair GetCumulativeSpaceAndReductionLength(const Stage& stage) { int64_t cum_space_len = 1, cum_reduce_len = 1; for (const auto& iter : stage->iters) { if (iter->iter_kind == IteratorKind::kSpatial) { @@ -321,7 +321,7 @@ inline bool NeedsRfactor(const SearchTask& task, const State& state, int stage_i // Compute the product of lengths of all space iters and all reduce iters int cum_space_len, cum_reduce_len; std::tie(cum_space_len, cum_reduce_len) = - GetCumulativeSpaceAndReductionLengh(state->stages[stage_id]); + GetCumulativeSpaceAndReductionLength(state->stages[stage_id]); if (NeedsMultilevelTiling(task, state, stage_id)) { // Do not use rfactor if we have enough parallelism on space iters @@ -635,7 +635,7 @@ class SplitFactorizationMemo { const std::vector& GetFactors(int n); private: - void DfsEnumerate(int now, int remaining_lenght, int max_innermost_factor); + void DfsEnumerate(int now, int remaining_length, int max_innermost_factor); std::unordered_map>> memory_; @@ -645,12 +645,12 @@ class SplitFactorizationMemo { std::unordered_map> factor_memory_; }; -/*! \brief Get the indexes of SplitStep that processes on spatial iteratior. */ +/*! \brief Get the indexes of SplitStep that processes on spatial iterator. */ Array GetSpatialSplitStepIds(const State& s, int stage_id); // Apply multi-level tiling structure according to a string format, -// where "S" stands a space level, "R" stands for a reudciton level. -// For example, if the format is "SSRSRS", the we will +// where "S" stands a space level, "R" stands for a reduction level. +// For example, if the format is "SSRSRS", then we will // use tiling structure: space_L0, space_L1, reduce_L0, space_L2, reduce_L1, space_L3 // For example, if apply "SSRSRS" to matrix multiplication, // we have space iterators i and j, reduce iterator k. diff --git a/src/auto_scheduler/transform_step.cc b/src/auto_scheduler/transform_step.cc index 2c3af0d..cec83bb 100755 --- a/src/auto_scheduler/transform_step.cc +++ b/src/auto_scheduler/transform_step.cc @@ -900,7 +900,7 @@ SplitStep::SplitStep(int stage_id, int iter_id, Optional extent, const Array>& lengths, bool inner_to_outer) { auto node = make_object(); node->stage_id = stage_id; - // Extent can be a unreducible expression in some special cases + // Extent can be a irreducible expression in some special cases if (extent && extent.value()->IsInstance()) { node->extent = tvm::Downcast(extent.value()); } -- 2.7.4