Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding ungrouped keyed gather #704

Merged
merged 1 commit into from
Oct 30, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,31 @@ BatchTLink<Iterator<Tuple<K, Iterator<V>>>, Tuple<K, Iterator<V>>> keyedGather(
*
* @return this TSet
*/
BatchTLink<Iterator<Tuple<K, Iterator<V>>>, Tuple<K, Iterator<V>>>
keyedGather(PartitionFunc<K> partitionFn, Comparator<K> comparator);
BatchTLink<Iterator<Tuple<K, Iterator<V>>>, Tuple<K, Iterator<V>>> keyedGather(
PartitionFunc<K> partitionFn, Comparator<K> comparator);

/**
* Gather by key ungrouped
*
* @return this TSet
*/
BatchTLink<Iterator<Tuple<K, V>>, Tuple<K, V>> keyedGatherUngrouped();

/**
* Gather by key ungrouped
*
* @return this TSet
*/
BatchTLink<Iterator<Tuple<K, V>>, Tuple<K, V>> keyedGatherUngrouped(PartitionFunc<K> partitionFn);

/**
* Gather by key. Sort the records with the key according to the comparator ungrouped
*
* @return this TSet
*/
BatchTLink<Iterator<Tuple<K, V>>, Tuple<K, V>> keyedGatherUngrouped(PartitionFunc<K> partitionFn,
Comparator<K> comparator);


/**
* Reduce by key
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,14 @@ public class KGatherExample extends BatchTsetExample {
private static final Logger LOG = Logger.getLogger(KGatherExample.class.getName());
private static final long serialVersionUID = -2753072757838198105L;

private <T> String iterToString(Iterator<T> iter) {
StringBuilder sb = new StringBuilder();
while (iter.hasNext()) {
sb.append(iter.next()).append(" ");
}
return sb.toString();
}

@Override
public void execute(BatchTSetEnvironment env) {
SourceTSet<Integer> src = dummySource(env, COUNT, PARALLELISM);
Expand All @@ -53,7 +61,7 @@ public void execute(BatchTSetEnvironment env) {

LOG.info("test foreach");
klink.forEach((ApplyFunc<Tuple<Integer, Iterator<Integer>>>)
data -> LOG.info("key " + data.getKey() + " " + data.getValue().toString())
data -> LOG.info(data.getKey() + " -> " + iterToString(data.getValue()))
);

LOG.info("test map");
Expand All @@ -63,7 +71,7 @@ public void execute(BatchTSetEnvironment env) {
while (input.getValue().hasNext()) {
s += input.getValue().next();
}
return "key " + input.getKey() + " " + s;
return input.getKey() + " -> " + s;
})
.direct()
.forEach(s -> LOG.info("map: " + s));
Expand All @@ -74,7 +82,8 @@ public void execute(BatchTSetEnvironment env) {
StringBuilder s = new StringBuilder();
while (input.hasNext()) {
Tuple<Integer, Iterator<Integer>> next = input.next();
s.append("$").append(next.getKey()).append("_").append(next.getValue().toString());
s.append(" [").append(next.getKey()).append(" -> ")
.append(iterToString(next.getValue())).append("] ");
}
return s.toString();
})
Expand All @@ -86,7 +95,7 @@ public void execute(BatchTSetEnvironment env) {
(input, output) -> {
while (input.hasNext()) {
Tuple<Integer, Iterator<Integer>> next = input.next();
output.collect(next.getKey() + "#" + next.getValue().toString());
output.collect(next.getKey() + " -> " + iterToString(next.getValue()));
}
})
.direct()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package edu.iu.dsc.tws.examples.tset.batch;

import java.util.HashMap;
import java.util.Iterator;
import java.util.logging.Logger;

import edu.iu.dsc.tws.api.JobConfig;
import edu.iu.dsc.tws.api.comms.structs.Tuple;
import edu.iu.dsc.tws.api.config.Config;
import edu.iu.dsc.tws.api.tset.fn.ApplyFunc;
import edu.iu.dsc.tws.api.tset.fn.ComputeCollectorFunc;
import edu.iu.dsc.tws.api.tset.fn.ComputeFunc;
import edu.iu.dsc.tws.api.tset.fn.MapFunc;
import edu.iu.dsc.tws.rsched.core.ResourceAllocator;
import edu.iu.dsc.tws.tset.env.BatchTSetEnvironment;
import edu.iu.dsc.tws.tset.links.batch.KeyedGatherUngroupedTLink;
import edu.iu.dsc.tws.tset.sets.batch.SourceTSet;

public class KGatherUngroupedExample extends BatchTsetExample {
private static final Logger LOG = Logger.getLogger(KGatherUngroupedExample.class.getName());
private static final long serialVersionUID = -2753072757838198105L;

@Override
public void execute(BatchTSetEnvironment env) {
SourceTSet<Integer> src = dummySource(env, COUNT, PARALLELISM);

KeyedGatherUngroupedTLink<Integer, Integer> klink = src.mapToTuple(i -> new Tuple<>(i % 4, i))
.keyedGatherUngrouped();

LOG.info("test foreach");
klink.forEach((ApplyFunc<Tuple<Integer, Integer>>)
data -> LOG.info(data.getKey() + " -> " + data.getValue())
);

LOG.info("test map");
klink.map((MapFunc<String, Tuple<Integer, Integer>>)
input -> input.getKey() + " -> " + input.getValue())
.direct()
.forEach(s -> LOG.info("map: " + s));

LOG.info("test compute");
klink.compute((ComputeFunc<String, Iterator<Tuple<Integer, Integer>>>)
input -> {
StringBuilder sb = new StringBuilder();
while (input.hasNext()) {
Tuple<Integer, Integer> next = input.next();
sb.append("[").append(next.getKey()).append("->").append(next.getValue()).append("]");
}
return sb.toString();
})
.direct()
.forEach(s -> LOG.info("compute: " + s));

LOG.info("test computec");
klink.compute((ComputeCollectorFunc<String, Iterator<Tuple<Integer, Integer>>>)
(input, output) -> {
while (input.hasNext()) {
Tuple<Integer, Integer> next = input.next();
output.collect(next.getKey() + " -> " + next.getValue() * 2);
}
})
.direct()
.forEach(s -> LOG.info("computec: " + s));
}


public static void main(String[] args) {
Config config = ResourceAllocator.loadConfig(new HashMap<>());

JobConfig jobConfig = new JobConfig();
BatchTsetExample.submitJob(config, PARALLELISM, jobConfig,
KGatherUngroupedExample.class.getName());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,23 +15,14 @@
import java.util.Iterator;
import java.util.logging.Logger;

import edu.iu.dsc.tws.api.comms.CommunicationContext;
import edu.iu.dsc.tws.api.comms.structs.Tuple;
import edu.iu.dsc.tws.api.compute.OperationNames;
import edu.iu.dsc.tws.api.compute.graph.Edge;
import edu.iu.dsc.tws.api.tset.fn.PartitionFunc;
import edu.iu.dsc.tws.tset.env.BatchTSetEnvironment;
import edu.iu.dsc.tws.tset.sets.batch.CachedTSet;

public class KeyedGatherTLink<K, V> extends BatchIteratorLink<Tuple<K, Iterator<V>>> {
public class KeyedGatherTLink<K, V> extends KeyedGatherUngroupedTLink<K, Iterator<V>> {
private static final Logger LOG = Logger.getLogger(KeyedGatherTLink.class.getName());

private PartitionFunc<K> partitionFunction;

private boolean sortByKey;

private Comparator<K> keyCompartor;

public KeyedGatherTLink(BatchTSetEnvironment tSetEnv, int sourceParallelism) {
this(tSetEnv, null, sourceParallelism, false, null);
}
Expand All @@ -43,41 +34,22 @@ public KeyedGatherTLink(BatchTSetEnvironment tSetEnv, PartitionFunc<K> partition

public KeyedGatherTLink(BatchTSetEnvironment tSetEnv, PartitionFunc<K> partitionFn,
int sourceParallelism, boolean sortByKey, Comparator<K> keyCompartor) {
super(tSetEnv, "kgather", sourceParallelism);
this.partitionFunction = partitionFn;
this.sortByKey = sortByKey;
this.keyCompartor = keyCompartor;
if (sortByKey && keyCompartor == null) {
LOG.info("Key comparator cannot be null when sorting is true");
}
}

@Override
public Edge getEdge() {
Edge e = new Edge(getId(), OperationNames.KEYED_GATHER, getMessageType());
e.setKeyed(true);
e.setPartitioner(partitionFunction);
e.addProperty(CommunicationContext.SORT_BY_KEY, this.sortByKey);

if (this.keyCompartor != null) {
e.addProperty(CommunicationContext.KEY_COMPARATOR, this.keyCompartor);
}
return e;
super(tSetEnv, partitionFn, sourceParallelism, sortByKey, keyCompartor);
this.enableGroupByKey();
}

@Override
public KeyedGatherTLink<K, V> setName(String n) {
rename(n);
return this;
return (KeyedGatherTLink<K, V>) super.setName(n);
}

@Override
public CachedTSet<Tuple<K, Iterator<V>>> lazyCache() {
return (CachedTSet<Tuple<K, Iterator<V>>>) super.lazyCache();
return super.lazyCache();
}

@Override
public CachedTSet<Tuple<K, Iterator<V>>> cache() {
return (CachedTSet<Tuple<K, Iterator<V>>>) super.cache();
return super.cache();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package edu.iu.dsc.tws.tset.links.batch;

import java.util.Comparator;
import java.util.logging.Logger;

import edu.iu.dsc.tws.api.comms.CommunicationContext;
import edu.iu.dsc.tws.api.comms.structs.Tuple;
import edu.iu.dsc.tws.api.compute.OperationNames;
import edu.iu.dsc.tws.api.compute.graph.Edge;
import edu.iu.dsc.tws.api.tset.fn.PartitionFunc;
import edu.iu.dsc.tws.tset.env.BatchTSetEnvironment;
import edu.iu.dsc.tws.tset.sets.batch.CachedTSet;

public class KeyedGatherUngroupedTLink<K, V> extends BatchIteratorLink<Tuple<K, V>> {
private static final Logger LOG = Logger.getLogger(KeyedGatherUngroupedTLink.class.getName());

private PartitionFunc<K> partitionFunction;
private Comparator<K> keyCompartor;

private boolean sortByKey;
private boolean groupByKey = false;

public KeyedGatherUngroupedTLink(BatchTSetEnvironment tSetEnv, int sourceParallelism) {
this(tSetEnv, null, sourceParallelism, false, null);
}

public KeyedGatherUngroupedTLink(BatchTSetEnvironment tSetEnv, PartitionFunc<K> partitionFn,
int sourceParallelism) {
this(tSetEnv, partitionFn, sourceParallelism, false, null);
}

public KeyedGatherUngroupedTLink(BatchTSetEnvironment tSetEnv, PartitionFunc<K> partitionFn,
int sourceParallelism, boolean sortByKey,
Comparator<K> keyCompartor) {
super(tSetEnv, "kgather", sourceParallelism);
this.partitionFunction = partitionFn;
this.sortByKey = sortByKey;
this.keyCompartor = keyCompartor;
if (sortByKey && keyCompartor == null) {
LOG.warning("Key comparator cannot be null when sorting is true");
}
}

@Override
public Edge getEdge() {
Edge e = new Edge(getId(), OperationNames.KEYED_GATHER, getMessageType());
e.setKeyed(true);
e.setPartitioner(partitionFunction);
e.addProperty(CommunicationContext.SORT_BY_KEY, this.sortByKey);
e.addProperty(CommunicationContext.GROUP_BY_KEY, this.groupByKey);

if (this.keyCompartor != null) {
e.addProperty(CommunicationContext.KEY_COMPARATOR, this.keyCompartor);
}
return e;
}

void enableGroupByKey() {
this.groupByKey = true;
}

@Override
public KeyedGatherUngroupedTLink<K, V> setName(String n) {
rename(n);
return this;
}

@Override
public CachedTSet<Tuple<K, V>> lazyCache() {
return (CachedTSet<Tuple<K, V>>) super.lazyCache();
}

@Override
public CachedTSet<Tuple<K, V>> cache() {
return (CachedTSet<Tuple<K, V>>) super.cache();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import edu.iu.dsc.tws.tset.links.batch.JoinTLink;
import edu.iu.dsc.tws.tset.links.batch.KeyedDirectTLink;
import edu.iu.dsc.tws.tset.links.batch.KeyedGatherTLink;
import edu.iu.dsc.tws.tset.links.batch.KeyedGatherUngroupedTLink;
import edu.iu.dsc.tws.tset.links.batch.KeyedPartitionTLink;
import edu.iu.dsc.tws.tset.links.batch.KeyedReduceTLink;
import edu.iu.dsc.tws.tset.sets.BaseTSet;
Expand Down Expand Up @@ -93,6 +94,31 @@ public KeyedGatherTLink<K, V> keyedGather(PartitionFunc<K> partitionFn,
return gather;
}

@Override
public KeyedGatherUngroupedTLink<K, V> keyedGatherUngrouped() {
KeyedGatherUngroupedTLink<K, V> gather = new KeyedGatherUngroupedTLink<>(getTSetEnv(),
getParallelism());
addChildToGraph(gather);
return gather;
}

@Override
public KeyedGatherUngroupedTLink<K, V> keyedGatherUngrouped(PartitionFunc<K> partitionFn) {
KeyedGatherUngroupedTLink<K, V> gather = new KeyedGatherUngroupedTLink<>(getTSetEnv(),
partitionFn, getParallelism());
addChildToGraph(gather);
return gather;
}

@Override
public KeyedGatherUngroupedTLink<K, V> keyedGatherUngrouped(PartitionFunc<K> partitionFn,
Comparator<K> comparator) {
KeyedGatherUngroupedTLink<K, V> gather = new KeyedGatherUngroupedTLink<>(getTSetEnv(),
partitionFn, getParallelism(), true, comparator);
addChildToGraph(gather);
return gather;
}

@Override
public <VR> JoinTLink<K, V, VR> join(BatchTupleTSet<K, VR> rightTSet,
CommunicationContext.JoinType type,
Expand All @@ -104,7 +130,6 @@ public <VR> JoinTLink<K, V, VR> join(BatchTupleTSet<K, VR> rightTSet,
} else {
join = new JoinTLink<>(getTSetEnv(), type, keyComparator, this, rightTSet);
}

addChildToGraph(join);

// add the right tset connection
Expand Down
Loading