proposal: x/exp/xiter: new package with iterator adapters #61898

rsc · 2023-08-09T16:02:49Z

We propose to add a new package golang.org/x/exp/xiter that defines adapters on iterators. Perhaps these would one day be moved to the iter package or perhaps not. There are concerns about how these would affect idiomatic Go code. It seems worth defining them in x/exp to help that discussion along, and then we can decide whether they move anywhere else when we have more experience with them.

The package is called xiter to avoid a collision with the standard library iter (see proposal #61897). An alternative would be to have xiter define wrappers and type aliases for all the functions and types in the standard iter package, but the type aliases would depend on #46477, which is not yet implemented.

This is one of a collection of proposals updating the standard library for the new 'range over function' feature (#61405). It would only be accepted if that proposal is accepted. See #61897 for a list of related proposals.

/*
Package xiter implements basic adapters for composing iterator sequences:

[Concat] and [Concat2] concatenate sequences.
[Equal], [Equal2], [EqualFunc], and [EqualFunc2] check whether two sequences contain equal values.
[Filter] and [Filter2] filter a sequence according to a function f.
[Limit] and [Limit2] truncate a sequence after n items.
[Map] and [Map2] apply a function f to a sequence.
[Merge], [Merge2], [MergeFunc], and [MergeFunc2] merge two ordered sequences.
[Reduce] and [Reduce2] combine the values in a sequence.
[Zip] and [Zip2] iterate over two sequences in parallel.

*/
package xiter

Ideally we would define these:

type Seq[V any] = iter.Seq[V]
type Seq2[K, V any] = iter.Seq2[K, V]

but we may not be able to. If not, the definitions below would refer to iter.Seq and iter.Seq2 instead of Seq and Seq2.

// Concat returns an iterator over the concatenation of the sequences.
func Concat[V any](seqs ...Seq[V]) Seq[V] {
	return func(yield func(V) bool) bool {
		for _, seq := range seqs {
			if !seq(yield) {
				return false
			}
		}
		return true
	}
}

// Concat2 returns an iterator over the concatenation of the sequences.
func Concat2[K, V any](seqs ...Seq2[K, V]) Seq[K, V] {
	return func(yield func(K, V) bool) bool {
		for _, seq := range seqs {
			if !seq(yield) {
				return false
			}
		}
		return true
	}
}

// Equal reports whether the two sequences are equal.
func Equal[V comparable](x, y Seq[V]) bool {
	for z := range Zip(x, y) {
		if z.Ok1 != z.Ok2 || z.V1 != z.V2 {
			return false
		}
	}
	return true
}

// Equal2 reports whether the two sequences are equal.
func Equal2[K, V comparable](x, y Seq2[K, V]) bool {
	for z := range Zip2(x, y) {
		if z.Ok1 != z.Ok2 || z.K1 != z.K2 || z.V1 != z.V2 {
			return false
		}
	}
	return true
}

// EqualFunc reports whether the two sequences are equal according to the function f.
func EqualFunc[V1, V2 any](x Seq[V1], y Seq[V2], f func(V1, V2) bool) bool {
	for z := range Zip(x, y) {
		if z.Ok1 != z.Ok2 || !f(z.V1, z.V2) {
			return false
		}
	}
	return true
}

// EqualFunc2 reports whether the two sequences are equal according to the function f.
func EqualFunc[K1, V1, K2, V2 any](x Seq[K1, V1], y Seq[K2, V2], f func(K1, V1, K2, V2) bool) bool {
	for z := range Zip(x, y) {
		if z.Ok1 != z.Ok2 || !f(z.K1, z.V1, z.K2, z.V2) {
			return false
		}
	}
	return true
}

// Filter returns an iterator over seq that only includes
// the values v for which f(v) is true.
func Filter[V any](f func(V) bool, seq Seq[V]) Seq[V] {
	return func(yield func(V2) bool) bool {
		for v := range seq {
			if f(v) && !yield(v) {
				return false
			}
		}
		return true
	}
}

// Filter2 returns an iterator over seq that only includes
// the pairs k, v for which f(k, v) is true.
func Filter2[K, V any](f func(K, V) bool, seq Seq2[K, V]) Seq[K, V] {
	return func(yield func(K, V) bool) bool {
		for k, v := range seq {
			if f(k, v) && !yield(k, v) {
				return false
			}
		}
		return true
	}
}

// Limit returns an iterator over seq that stops after n values.
func Limit[V any](seq Seq[V], n int) Seq[V] {
	return func(yield func(V) bool) bool {
		if n <= 0 {
			return true
		}
		for v := range seq {
			if !yield(v) {
				return false
			}
			if n--; n <= 0 {
				break
			}
		}
		return true
	}
}

// Limit2 returns an iterator over seq that stops after n key-value pairs.
func Limit2[K, V any](seq Seq2[K, V], n int) Seq2[K, V] {
	return func(yield func(K, V) bool) bool {
		if n <= 0 {
			return true
		}
		for k, v := range seq {
			if !yield(k, v) {
				return false
			}
			if n--; n <= 0 {
				break
			}
		}
		return true
	}
}

// Map returns an iterator over f applied to seq.
func Map[In, Out any](f func(In) Out, seq Seq[In]) Seq[Out] {
	return func(yield func(Out) bool) bool {
		for in := range seq {
			if !yield(f(in)) {
				return false
			}
		}
		return true
	}
}

// Map2 returns an iterator over f applied to seq.
func Map[KIn, VIn, KOut, VOut any](f func(KIn, VIn) (KOut, VOut), seq Seq[KIn, VIn]) Seq[KOut, VOut] {
	return func(yield func(KOut, VOut) bool) bool {
		for k, v := range seq {
			if !yield(f(k, v)) {
				return false
			}
		}
		return true
	}
}

// Merge merges two sequences of ordered values.
// Values appear in the output once for each time they appear in x
// and once for each time they appear in y.
// If the two input sequences are not ordered,
// the output sequence will not be ordered,
// but it will still contain every value from x and y exactly once.
//
// Merge is equivalent to calling MergeFunc with cmp.Compare[V]
// as the ordering function.
func Merge[V cmp.Ordered](x, y Seq[V]) Seq[V] {
	return MergeFunc(x, y, cmp.Compare[V])
}

// MergeFunc merges two sequences of values ordered by the function f.
// Values appear in the output once for each time they appear in x
// and once for each time they appear in y.
// When equal values appear in both sequences,
// the output contains the values from x before the values from y.
// If the two input sequences are not ordered by f,
// the output sequence will not be ordered by f,
// but it will still contain every value from x and y exactly once.
func MergeFunc[V any](x, y Seq[V], f func(V, V) int) Seq[V] {
	return func(yield func(V) bool) bool {
		next, stop := Pull(y)
		defer stop()
		v2, ok2 := next()
		for v1 := range x {
			for ok2 && f(v1, v2) > 0 {
				if !yield(v2) {
					return false
				}
				v2, ok2 = next()
			}
			if !yield(v1) {
				return false
			}
		}
		for ok2 {
			if !yield(v2) {
				return false
			}
			v2, ok2 = next()
		}
	}
}

// Merge2 merges two sequences of key-value pairs ordered by their keys.
// Pairs appear in the output once for each time they appear in x
// and once for each time they appear in y.
// If the two input sequences are not ordered by their keys,
// the output sequence will not be ordered by its keys,
// but it will still contain every pair from x and y exactly once.
//
// Merge2 is equivalent to calling MergeFunc2 with cmp.Compare[K]
// as the ordering function.
func Merge[K cmp.Ordered, V any](x, y Seq2[K, V]) Seq2[K, V] {
	return MergeFunc2(x, y, cmp.Compare[K])
}

// MergeFunc2 merges two sequences of key-value pairs ordered by the function f.
// Pairs appear in the output once for each time they appear in x
// and once for each time they appear in y.
// When pairs with equal keys appear in both sequences,
// the output contains the pairs from x before the pairs from y.
// If the two input sequences are not ordered by f,
// the output sequence will not be ordered by f,
// but it will still contain every pair from x and y exactly once.
func MergeFunc[K, V any](x, y Seq2[K, V], f func(K, K) int) Seq2[K, V] {
	return func(yield func(K, V) bool) bool {
		next, stop := Pull2(y)
		defer stop()
		k2, v2, ok2 := next()
		for k1, v1 := range x {
			for ok2 && f(k1, k2) > 0 {
				if !yield(k2, v2) {
					return false
				}
				k2, v2, ok2 = next()
			}
			if !yield(k1, v1) {
				return false
			}
		}
		for ok2 {
			if !yield(k2, v2) {
				return false
			}
			k2, v2, ok2 = next()
		}
	}
}

// Reduce combines the values in seq using f.
// For each value v in seq, it updates sum = f(sum, v)
// and then returns the final sum.
// For example, if iterating over seq yields v1, v2, v3,
// Reduce returns f(f(f(sum, v1), v2), v3).
func Reduce[Sum, V any](sum Sum, f func(Sum, V) Sum, seq Seq[V]) Sum {
	for _, v := range seq {
		sum = f(sum, v)
	}
	return sum
}

// Reduce2 combines the values in seq using f.
// For each pair k, v in seq, it updates sum = f(sum, k, v)
// and then returns the final sum.
// For example, if iterating over seq yields (k1, v1), (k2, v2), (k3, v3)
// Reduce returns f(f(f(sum, k1, v1), k2, v2), k3, v3).
func Reduce2[Sum, K, V any](sum Sum, f func(Sum, K, V) Sum, seq Seq2[K, V]) Sum {
	for k, v := range seq {
		sum = f(sum, k, v)
	}
	return sum
}

// A Zipped is a pair of zipped values, one of which may be missing,
// drawn from two different sequences.
type Zipped[V1, V2 any] struct {
	V1  V1
	Ok1 bool // whether V1 is present (if not, it will be zero)
	V2  V2
	Ok2 bool // whether V2 is present (if not, it will be zero)
}

// Zip returns an iterator that iterates x and y in parallel,
// yielding Zipped values of successive elements of x and y.
// If one sequence ends before the other, the iteration continues
// with Zipped values in which either Ok1 or Ok2 is false,
// depending on which sequence ended first.
//
// Zip is a useful building block for adapters that process
// pairs of sequences. For example, Equal can be defined as:
//
//	func Equal[V comparable](x, y Seq[V]) bool {
//		for z := range Zip(x, y) {
//			if z.Ok1 != z.Ok2 || z.V1 != z.V2 {
//				return false
//			}
//		}
//		return true
//	}
func Zip[V1, V2 any](x Seq[V1], y Seq1[V2]) Seq[Zipped[V1, V2]] {
	return func(yield func(z Zipped[V1, V2]) bool) bool {
		next, stop := Pull(seq2)
		defer stop()
		v2, ok2 := next()
		for v1 := range seq1 {
			if !yield(Zipped[V1, V2]{v1, true, v2, ok2}) {
				return false
			}
			v2, ok2 = next()
		}
		var zv1 V1
		for ok2 {
			if !yield(Zipped[V1, V2]{zv1, false, v2, ok2}) {
				return false
			}
			v2, ok2 = next()
		}
	}
	return true
}

// A Zipped2 is a pair of zipped key-value pairs,
// one of which may be missing, drawn from two different sequences.
type Zipped2[K1, V1, K2, V2 any] struct {
	K1  K1
	V1  V1
	Ok1 bool // whether K1, V1 are present (if not, they will be zero)
	K2  K2
	V2  V2
	Ok2 bool // whether K2, V2 are present (if not, they will be zero)
}

// Zip2 returns an iterator that iterates x and y in parallel,
// yielding Zipped2 values of successive elements of x and y.
// If one sequence ends before the other, the iteration continues
// with Zipped2 values in which either Ok1 or Ok2 is false,
// depending on which sequence ended first.
//
// Zip2 is a useful building block for adapters that process
// pairs of sequences. For example, Equal2 can be defined as:
//
//	func Equal2[K, V comparable](x, y Seq2[K, V]) bool {
//		for z := range Zip2(x, y) {
//			if z.Ok1 != z.Ok2 || z.K1 != z.K2 || z.V1 != z.V2 {
//				return false
//			}
//		}
//		return true
//	}
func Zip2[K1, V1, K2, V2 any](x Seq[K1, V1], y Seq2[K2, V2]) Seq[Zipped2[K1, V1, K2, V2]] {
	return func(yield func(z Zipped2[K1, V1, K2, V2]) bool) bool {
		next, stop := Pull2(y)
		defer stop()
		k2, v2, ok2 := next()
		for k1, v1 := range x {
			if !yield(Zipped2[K1, V1, K2, V2]{k1, v1, true, k2, v2, ok2}) {
				return false
			}
			k2, v2, ok2 = next()
		}
		var zk1 K1
		var zv1 V1
		for ok2 {
			if !yield(Zipped2[V1, V2]{zk1, zv1, false, v2, ok2}) {
				return false
			}
			k2, v2, ok2 = next()
		}
	}
	return true
}

The text was updated successfully, but these errors were encountered:

gophun · 2023-08-09T17:23:01Z

The duplication of each function is the first thing that catches the eye. Are there thoughts on why this is acceptable?

gophun · 2023-08-09T17:39:44Z

What about an adapter that converts an iter.Seq[V] to an iter.Seq2[int, V] and an adapter that converts an iter.Seq2[K, V] to an iter.Seq[V]?

zephyrtronium · 2023-08-09T17:58:55Z

Some typos: EqualFunc2, Map2, Merge2, and MergeFunc2 lack the 2 suffixes on their actual names. They're all correct in the corresponding documentation.

earthboundkid · 2023-08-09T20:05:17Z

May I humbly suggest that the name "iterutils" is less susceptible to, uh, unfortunate mispronunciation.

earthboundkid · 2023-08-09T20:09:11Z

For Reduce, the callback should go last: func Reduce[Sum, V any](sum Sum, seq Seq[V], f func(Sum, V) Sum) Sum.

DeedleFake · 2023-08-09T20:41:23Z

For Reduce, the callback should go last: func Reduce[Sum, V any](sum Sum, seq Seq[V], f func(Sum, V) Sum) Sum.

I'd actually prefer func Reduce[Sum, V any](seq Seq[V], sum Sum, f func(Sum, V) Sum) Sum.

Edit: I just realized that if Reduce() is being used to build an array, putting sum first puts everything in the same order as Append() and other functions that put the destination first. I'm not sure if that's worth it or not.

rsc · 2023-08-09T21:41:23Z

This proposal has been added to the active column of the proposals project
and will now be reviewed at the weekly proposal review meetings.
— rsc for the proposal review group

DeedleFake · 2023-08-10T00:47:52Z

The more I think about it, the more that I think that API design for this should wait until after a decision is made on #49085. Multiple other languages have proven over and over that a left-to-right chained syntax is vastly superior ergonomically to simple top-level functions for iterators. For example, compare

nonNegative := xiter.Filter(
  xiter.Map(
    bufio.Lines(r),
    parseLine,
  ),
  func(v int) bool { return v >= 0 },
)

vs.

nonNegative := bufio.Lines(r).
  Map(parseLine).
  Filter(func(v int) bool { return v >= 0 })

Go's a little weird because of the need to put the .on the previous line, but other than that one oddity, which I could get used to, the second is better in every way. It reads in the order that actions actually happen, it's less repetitive, etc. The only real way to emulate it currently is something like

lines := bufio.Lines(r)
intlines := xiter.Map(lines, parseLine)
nonNegative := xiter.Filter(func(v int) bool { return v >= 0 })

That works, but it clutters up the local namespace and it's significantly harder to edit. For example, if you decide you need to add a new step in the chain, you have to make sure that all of the variables for each iterator match up in the previous and succeeding calls.

ianlancetaylor · 2023-08-10T00:55:14Z

What type does bufio.Lines return to make that work in Go? What methods does that type support? What is the type of nonNegative? I mean these as honest questions. Can we write this kind of code in Go today, or would we need new language features?

hherman1 · 2023-08-10T01:50:04Z

You would probably have to wrap the base iterator like:

stream.New(bufio.Lines).
    Filter(…).
    …

DeedleFake · 2023-08-10T03:00:48Z

@ianlancetaylor

Sorry. I should have stuck a comment in. I was just coming up with some hypothetical function that would give an iter.Seq[string]. In this case, the idea was that it would internally use a bufio.Scanner to yield lines from an io.Reader or something. My original code had an anonymous func(string) int instead of the vague parseLine but I removed it because it was clogging up the example with irrelevant code and I didn't clarify when I did.

@hherman1

Not necessarily. The transformative and sink functions on iterators could just be defined as methods on iter.Seq.

hherman1 · 2023-08-10T03:09:24Z

~~But iter.Seq is an interface type no? Are you saying it should be a struct type?~~

I was wrong, it’s not an interface.

benhoyt · 2023-08-10T03:22:51Z

Why do some functions take the f func as the last parameter, but Filter and Map take it as the first, and Reduce in the middle? Most other functions in the stdlib take funcs as the last parameter, such as sort.Slice, slices.*Func, ServeMux.HandleFunc, and so on. This makes code that uses them with inline function literals more readable:

names := xiter.Map(func (p Person) string {
	return p.Name
}, people) // "people" gets lost

// vs

names := xiter.Map(people, func (p Person) string {
	return p.Name
})

Merovius · 2023-08-10T05:58:35Z

@DeedleFake There won't be a "decision" on #49085 anytime soon. There are good reasons not to do it yet, but we also don't want to say it never happens. The issue exists to reflect that state. What it comes down to is, would you rather have no iterators (for the foreseeable future) or ones which can't be "chained"?

DeedleFake · 2023-08-10T06:04:09Z

What it comes down to is, would you rather have no iterators (for the foreseeable future) or ones which can't be "chained"?

No iterators, definitely. I've done fine without them for over a decade. I can wait a bit longer. If a bad implementation goes in, I'll never get a good version. Plus, I can just write my own implementation of whatever iterator functions I need as long as range-over-func exists while I wait.

gophun · 2023-08-10T06:47:46Z

Neither chaining nor functional programming has ever been a decisive or recommended technique in Go. Instead, iteration—specifically, procedural 'for' loops—has always been a core technique since the language's inception. The iterator proposals aim to enhance this core approach. While I don't know what the overall plans are, if you're hoping for Go to follow the path of Java Streams or C# LINQ, you might be in for disappointment.

Merovius · 2023-08-10T10:02:02Z

I can wait a bit longer. If a bad implementation goes in, I'll never get a good version.

I think "a bit" is misleading. We are talking years - if at all. And I don't believe the second part of that sentence is true either, we could always release a v2 of the relevant packages, if we ever manage to do #49085 in a decade or so.

DeedleFake · 2023-08-10T13:54:52Z

While I don't know what the overall plans are, if you're hoping for Go to follow the path of Java Streams or C# LINQ, you might be in for disappointment.

Is that not the intention of these proposals? To build a standardized iterator system that works similarly to those? Why else is there a proposal here for Map(), Filter(), and Reduce(), among others? I have no problem with functions like slices.Backwards() and other source function proposals. My only problem is the transformative and sink functions.

I think "a bit" is misleading. We are talking years - if at all. And I don't believe the second part of that sentence is true either, we could always release a v2 of the relevant packages, if we ever manage to do #49085 in a decade or so.

Edit: The way this proposal is phrased does actually imply that they may be heavily reevaluated enough in x/exp that they may not go into the standard library at all, so maybe my point is moot after all. I still think that this is a valid issue with the API design to bring up, but maybe it's a bit off-topic for this particular proposal and should wait until after they're in x/exp and it can be more easily demonstrated how awkward they are to use. I don't like the idea that existing code will be broken when some variant of them does potentially go into the standard library, but it's less of a problem than I was worried about. Never mind. Please ignore my rambling below.

That issue has only been open for 2 years. I think assuming that it'll take a decade to solve is a bit unfair. Yes, a v2 is an option, especially if #61716 is accepted, but that was created out of necessity to deal with problems in an existing package, while this would essentially be purposefully putting problems into a new package. It's not like I'm saying that iterators are unacceptable to me in this state, just that features have been delayed or cut before because of possible changes coming later and that I think that it's prudent to discuss the possibility here. That just happened in the last few weeks in the maps package because of the possibility of the acceptance of #61405. I think the same should be done with the transformative and sink functions for now, or at the very least those functions should be planned to stay in x/exp until some way to clean up the API is decided on, that's all.

One of my favorite things about Go is how slow and methodical it (usually) is in introducing new features. I think that the fact that it took over a decade to add generics is a good thing, and I really wanted generics. One of the purposes of that approach is to try avoid having to fix it later. Adding those functions in the proposed manner will almost definitely necessitate that later fix, and I very much would like to avoid that if at all possible.

gophun · 2023-08-10T14:34:01Z

Is that not the intention of these proposals? To build a standardized iterator system that works similarly to those?

Java Streams and .NET LINQ build on a standardized iterator system, but they are more than that. Both languages had a generic iterator system before. Iterators are useful without chaining or functional programming.

Why else is there a proposal here for Map(), Filter(), and Reduce(), among others?

That would be this very proposal, and it comes with a caveat: "... or perhaps not. There are concerns about how these would affect idiomatic Go code. "

This means that not everyone who has read these proposals in advance believes that this part is a good idea.

jba · 2023-08-10T16:26:54Z

While I don't know what the overall plans are, if you're hoping for Go to follow the path of Java Streams or C# LINQ, you might be in for disappointment.

Is that not the intention of these proposals? To build a standardized iterator system that works similarly to those? Why else is there a proposal here for Map(), Filter(), and Reduce(), among others?

Maybe chaining leads to too much of a good thing. It becomes more tempting to write long, hard-to-read chains of functions. You're less likely to do that if you have to nest calls.

As an analogy, Go has if. Isn't the intention of if to allow conditional execution? Why then shouldn't Go have the ternary operator ?:? Because it often leads to hard-to-read code.

rsc · 2023-08-10T16:50:12Z

Re #49085, generic methods either require (A) dynamic code generation or (B) terrible speed or (C) hiding those methods from dynamic interface checks or (D) not doing them at all. We have chosen option (D). The issue remains open like so many suggestions people have made, but I don't see a realistic path forward where we choose A, B, or C, nor do I see a fifth option. So it makes sense to assume generic methods are not going to happen and do our work accordingly.

Merovius · 2023-08-10T17:36:48Z

@DeedleFake The issue is not lack of understanding what a lack of parameterized methods means. It's just that, as @rsc said, wanting them doesn't make them feasible. The issue only being 2 years old is deceptive. The underlying problem is actually as old as Go and one of the main reasons we didn't have generics for most of that. Which you should consider, when you say

I think that the fact that it took over a decade to add generics is a good thing, and I really wanted generics.

We got generics by committing to keep implementation strategies open, thus avoiding the generics dilemma. Not having parametric methods is a pretty direct consequence of that decision.

DeedleFake · 2023-08-10T17:59:35Z

Well, I tried. If that's the decision then that's the decision. I'm disappointed, but I guess I'll just be satisfied with what I do like about the current proposal, even if it has, in my opinion, some fairly major problems. Sorry for dragging this a bit off-topic there.

thediveo · 2023-08-10T20:18:19Z

Hope that it's not noise: I wondered if naming it the sum parameter might be implying to the less experienced dev that reduce does only summation, so I looked at Javascript's array reduce: that uses accumulator. I don't know if that is much better, I just wanted to point it out. If anything, let's have a good laugh.

jimmyfrasche · 2023-08-10T21:52:21Z

Those nonstandard Zip definitions look like they would occasionally be useful but I think I'd want the ordinary zip/zipLongest definitions most of the time. Those can be recovered from the proposed with some postprocessing but I'd hate to have to always do that.

These should be considered along with Limit:

LimitFunc - stop iterating after a predicate matches (often called TakeWhile in other languages)

Skip, SkipFunc - drop the first n items (or until the predicate matches) before yielding (opposite of Limit/LimitFunc, often called drop/dropWhile)

jba · 2023-08-10T22:56:14Z

Those nonstandard Zip definitions look like they would occasionally be useful but I think I'd want the ordinary zip/zipLongest definitions most of the time.

Can you explain the difference? Is it just that zip typically stops at the end of the shorter sequence? That is definitely less useful as a building block, and easy to write given these functions. What are some examples where stopping at the shortest is better?

jimmyfrasche · 2023-08-10T23:09:26Z

zip stops after the shorter sequence. zipLongest pads out the missing values of the shorter sequence with a specified value.

The provided ones are more general and can be used to build those but I can't really think of any time I've used zip where I needed to know that. I've always either known the lengths were equal by construction so it didn't matter or couldn't do anything other than drop the excess so it didn't matter. Maybe that's peculiar to me and the situations in which I reach for zip, but they've been defined like that in every language I can think I've used which has to be some kind of indicator that I'm not alone in this.

I'm not arguing for them to be replaced with the less general more common versions: I want those versions here too so I can use them directly without having to write a shim to the standard definition.

earthboundkid · 2023-09-08T17:40:03Z

@jba How would your ParseIntegers look? ISTM it would have to read the entire sequence first, to return the error.

I think it would look like this:

func ParseIntegers(seq iter.Seq[string], errp *error) iter.Seq[int] {
  return func(yield func(int) bool) {
    for s := range seq {
      n, err := atoi.ParseInt(s)
      if err != nil {
        *errp = err
        return
      }
      if !yield(n) {
        return
      }
    }
  }
}

Merovius · 2023-09-08T17:47:09Z

Yes, that works. I wouldn't want to write or use that. I also don't think it is wholly unnatural to continue processing, even if a single datum is broken - I think it makes sense to skip/log broken data and continue with the rest. Like, imagine a batch-processing pipeline, taking a sequence of filenames, doing some processing on each, and mapping it to an aggregate and error.

But sure, if the consensus is that we just don't think people should do that kind of stuff and should always just stop iterating if any error is encountered, we wouldn't need MapErr. Personally, I find that a strange assumption.

gazerro · 2023-09-08T19:02:59Z

@Merovius In addition to your options, I would propose another one.

If we wanted to convert a function that operate on a value:

func(v T) (R, error)

into a function that operates on a sequence:

func(s iter.Seq[T]) iter.Seq2[R, error]

we could define a function as

type Func[T, R any] func(v T) (R, error)
type SeqFunc[T, R any] func(s iter.Seq[T]) iter.Seq2[R, error]

func AsSeq[T, R any](fn Func[T, R]) SeqFunc[T, R] {
	return func(s iter.Seq[T]) iter.Seq2[R, error] {
		return func(yield func(R, error) bool) {
			for v := range s {
				v2, err := fn(v)
				if !yield(v2, err) {
					return
				}
			}
		}
	}
}

With this function, ParseIntegers could be written as

func ParseIntegers(s iter.Seq[string]) iter.Seq2[int, error] {
	return AsSeq(strconv.Atoi)(s)
}

or even

ParseIntegers := AsSeq(strconv.Atoi)

jimmyfrasche · 2023-09-08T19:21:11Z

Map12 seems entirely reasonable and generally useful. So does Map21, for that matter (Keys and Values can be implemented in terms of it).

Add

// First stops iteration and sets err on the first err in s
func First[K any](err *error, s iter.Seq2[K, error]) iter.Seq[K]

And you can do

var err error
for s := range xiter.First(&err, xiter.Map12(seq, strings.Atoi)) {

ianlancetaylor · 2023-09-08T19:47:13Z

I also don't think it is wholly unnatural to continue processing, even if a single datum is broken - I think it makes sense to skip/log broken data and continue with the rest. Like, imagine a batch-processing pipeline, taking a sequence of filenames, doing some processing on each, and mapping it to an aggregate and error.

If we want to log errors, then my first inclination would be to change the function we are passing in. Instead of passing strconv.Atoi pass

    func(s string) int {
        r, err := strconv.Atoi(s)
        if err != nil {
            log.Errorf("bad number %s: %v", s, err)
        }
        return r
    }

If we think that will happen with some frequency then

func LogErrors[T1, T2 any](f func(T1) (T2, error)) func(T1) T2 {
    return func(v T1) T2 {
        r, err := f(v)
        if err != nil {
            log.Errorf("failure on %v: %v", v, err)
        }
    }
}

Or, of course,

func LogAndSkipErrors[T1, T2 any](it iter.Seq[T1], f func(T1) (T2, error)) iter.Seq[T2] {
    return func(yield func(T2) bool) {
        for v := range it {
            r, err := f(v)
            if err != nil {
                log.Errorf("failure on %v: %v", v, err)
                continue
            }
            if !yield(r) {
                return
            }
        }
    }
}

My general point is that we don't have to focus on "a sequence of values with errors". We can focus on "use a function to convert one value to another, while handling errors in some way".

Merovius · 2023-09-08T19:57:09Z

FWIW none of these other options really changes my point about this being awkward and requiring boilerplate.

Anyways, I just thought I should bring it up. To me, the case of having a mapping-function that returns an error seems fairly common and having that return a Seq2[T, error] seems ultimately the more flexible option - you can choose to use a helper that ignores errors to turn it into a Seq[T], you can choose to use a helper that stops at the first error, you can stuff it into a range and handle the error…

But if the consensus is that this is not needed, okay.

ianlancetaylor · 2023-09-08T20:00:37Z

I'm not opposed to mapping functions that work with errors, but I think it would be premature to add them today.

AndrewHarrisSPU · 2023-09-08T20:21:21Z

@Merovius

Thanks for the provocative example. The combination of Seq2[T, error] and xiter-style composition is, to my mind, just underpowered. Another formulation with interesting properties:

Seq[T] -> context.Context -> Seq2[T, error]

Both possible values of the error term context.Canceled or context.DeadlineExceeded really suggest killing a pipeline. This contrasts with pipelines that might sensibly skip broken data, and presents a coloring problem at each stage.

I'm optimistic about ways to dress up Seq2[T, error] with a little more state to isolate the coloring problems, without further language extension. I don't know what the useful generalizations (if any) are, or when it's worth the effort. At the very least I think this formulation is a starting point for isolating errors:

Seq2[V, error] -> func(V, error) (V, error) -> Seq[V]

If I can provide a fallible sequence with an error handling function, I can isolate some expected errors and fail otherwise.

myaaaaaaaaa · 2023-11-23T04:37:43Z

I think it may be worth looking at this from a completely different angle:

jq is a DSL where iterators are first-class citizens, with syntax designed to be ideal for iterator chaining. Go already has several other DSLs in the standard library (regexp, text/template, etc) to deal with certain constructs that are intrinsically awkward to express in more general-purpose languages, and I believe that iterator chaining is one of those constructs.

A Go implementation can be found at https://github.com/itchyny/gojq . Its APIs are focused on map[string]any | []any | string | float64s due to the JSON focus of jq, but I think there would be great value in looking into ways to adapt it to be type-safe so that it would work more nicely with Go's types.

earthboundkid · 2023-11-23T14:33:05Z

Another reason not to name this package xiter is that since this was proposed, “xitter” has become a common nickname for Elon Musk’s social media site.

gophun · 2023-12-04T18:39:07Z

May I humbly suggest that the name "iterutils" is less susceptible to, uh, unfortunate mispronunciation.

Another reason not to name this package xiter is that since this was proposed, “xitter” has become a common nickname for Elon Musk’s social media site.

In English 'x' is pronounced /ks/, not /ʃ/. I think there would have to be intent behind mispronouncing it.

Also, 'liter' and 'litter' are two different words, and nobody would shudder at the thought of drinking a liter of water just because it's phonetically similar to 'litter'.

earthboundkid · 2023-12-04T19:05:45Z

An initial X is not common in English, but when it does occur, it is not pronounced as a KS sound like medial X. Xerox and xylophone are both Z sounds, which leads to the natural pronunciation "zitter". You also see Xi commonly called "Zee" by people who don't know Pinyin.

gophun · 2023-12-04T19:43:48Z

Okay, but a lot has to go wrong to accidentally change a voiced lenis alveolar fricative like /z/ into a voiceless fortis postalveolar fricative like /ʃ/. The more probable mispronunciation is /s/, leading to the word "sitter," which doesn't carry a negative connotation.

earthboundkid · 2023-12-04T21:08:19Z

Why deal with any of these problems? Just get a name that can't be mispronounced. iterx is fine, for example.

Nieskalany · 2023-12-06T14:09:37Z

I have better idea, just lets dont add this "feature". Go promise simplicity and with new abstractions it brakes this promise. And with all respect this should be main point of discussion, not "naming".

earthboundkid · 2023-12-06T14:55:11Z

Go promise simplicity and with new abstractions it brakes this promise.

Do you mean functions as iterators or this package? I think functions as iterations are a big change to the language but mostly a positive one. It's much too complex to make or use an iterator now. As for this package, I think it probably goes too far in the direction of library calls for what could just be a series of statements, but people are going to demand it, so might as well have a single semi-canonical version of it in golang.org/x.

myaaaaaaaaa · 2023-12-18T03:05:42Z

I think it may be worth looking at this from a completely different angle:

jq is a DSL where iterators are first-class citizens, with syntax designed to be ideal for iterator chaining.
...

To add to this, Wikipedia notes that jq is a pure functional language. When considering that jq's intended purpose is to query JSON, one comes to the realization that all query languages are pure functional languages (although obviously only when querying and not when updating: SQL SELECTs, GraphQL query{}s, etc).

It may just be that query languages are the ideal form of the functional programming paradigm: DSLs that are designed around easy composition of iterators, where side-effects don't exist due to being database read operations.

In that case, studying other query languages and designing a custom one tailored for Go's type system should be more fruitful than trying to shoehorn functional programming constructs directly into Go, of which the many difficulties are already discussed above. It would also allow for far more advanced iterator composition techniques than can be accomplished natively in Go.

For prior art on embedded query languages, see C#'s LINQ, which allows writing SQL-like expressions that, among other things, can be used to run SELECT statements on struct arrays. (Although note that LINQ involves keywords and expressions integrated directly into C#, whereas for Go I think it's preferable to have a DSL in a query package, akin to regexp or text/template)

forsaken628 · 2024-01-16T09:31:18Z

@DeedleFake

lines := bufio.Lines(r)
intlines := xiter.Map(lines, parseLine)
nonNegative := xiter.Filter(intlines, func(v int) bool { return v >= 0 })

Maybe a simply compiler magic is all we need.

usually

var buf *bytes.Buffer
buf.String()

but we can

var buf *bytes.Buffer
(*bytes.Buffer).String(buf)

why not this. a similar syntax like template pipeline

var buf *bytes.Buffer
buf \ (*bytes.Buffer).String()

now we have

nonNegative  := bufio.Lines(r) \
 xiter.Map(parseLine) \
 xiter.Filter(func(v int) bool { return v >= 0 })

AndrewHarrisSPU · 2024-01-17T00:08:15Z

Would it be useful to do something like:

iter -> iter/itcore
xiter -> iter/itlib

with iter providing implementation from both? The adapter package (currently xiter) is a low common denominator but not universal, narrower projects can do things other ways. It might be nice to help them import just itcore.

golightlyb · 2024-02-15T01:22:25Z

Throwing in a particular shout for some form of Tee, because the simple naïve implementation is not as performant as a trickier implementation. As such it would be valuable to get it right, once, in the stdlib and optimise it there.

(I've mentioned this before somewhere, but can't remember/find which discussion)

jba · 2024-02-20T03:05:59Z

A while ago, I wrote a nonsensical signature for a function that produced an iterator with possible errors:

func ParseIntegers(seq iter.Seq[string]) (iter.Seq[int], error)

I now know what I meant to write:

func ParseIntegers(seq iter.Seq[string]) (iter.Seq[int], func() error)

See this comment, where I argue that returning an error function along with an iter.Seq[T] is usually better than using a pointer to an error or iter.Seq2[T, error].

rsc added the Proposal label Aug 9, 2023

gopherbot added this to the Proposal milestone Aug 9, 2023

rsc mentioned this issue Aug 9, 2023

iter: new package for iterators #61897

Open

randall77 mentioned this issue Dec 10, 2023

cmd/compile: pre-allocate a slice of provable length #64595

Open

neetle mentioned this issue Dec 12, 2023

Add ability to define custom predicate functions expr-lang/expr#497

Closed

randall77 mentioned this issue Dec 15, 2023

proposal: slices: convert one slice to another #64742

Closed

rsc mentioned this issue Jan 24, 2024

slices: add Chunk function to divide []T into [][]T chunks #53987

Open

nobishino mentioned this issue Feb 27, 2024

Proposal: (Period[T]) Periodic and similar methods returns iter.Seq[Time[T]] instead of channel Code-Hex/synchro#34

Open

Merovius mentioned this issue Feb 28, 2024

bytes, strings: add iterator forms of existing functions #61901

Open

This was referenced Mar 19, 2024

slices: add CloneFunc function #66404

Closed

proposal: slices: add Channel #66409

Closed

ianlancetaylor mentioned this issue Apr 3, 2024

proposal: spec: variadic type parameters #66651

Open

proposal: x/exp/xiter: new package with iterator adapters #61898

proposal: x/exp/xiter: new package with iterator adapters #61898

Comments

rsc commented Aug 9, 2023 • edited

gophun commented Aug 9, 2023

gophun commented Aug 9, 2023

zephyrtronium commented Aug 9, 2023

earthboundkid commented Aug 9, 2023

earthboundkid commented Aug 9, 2023

DeedleFake commented Aug 9, 2023 • edited

rsc commented Aug 9, 2023

DeedleFake commented Aug 10, 2023

ianlancetaylor commented Aug 10, 2023

hherman1 commented Aug 10, 2023

DeedleFake commented Aug 10, 2023 • edited

hherman1 commented Aug 10, 2023 • edited

benhoyt commented Aug 10, 2023

Merovius commented Aug 10, 2023 • edited

DeedleFake commented Aug 10, 2023 • edited

gophun commented Aug 10, 2023

Merovius commented Aug 10, 2023

DeedleFake commented Aug 10, 2023 • edited

gophun commented Aug 10, 2023

jba commented Aug 10, 2023

rsc commented Aug 10, 2023 • edited

Merovius commented Aug 10, 2023 • edited

DeedleFake commented Aug 10, 2023

thediveo commented Aug 10, 2023

jimmyfrasche commented Aug 10, 2023

jba commented Aug 10, 2023

jimmyfrasche commented Aug 10, 2023

earthboundkid commented Sep 8, 2023

Merovius commented Sep 8, 2023 • edited

gazerro commented Sep 8, 2023 • edited

jimmyfrasche commented Sep 8, 2023 • edited

ianlancetaylor commented Sep 8, 2023

Merovius commented Sep 8, 2023

ianlancetaylor commented Sep 8, 2023

AndrewHarrisSPU commented Sep 8, 2023

myaaaaaaaaa commented Nov 23, 2023

earthboundkid commented Nov 23, 2023

gophun commented Dec 4, 2023

earthboundkid commented Dec 4, 2023

gophun commented Dec 4, 2023

earthboundkid commented Dec 4, 2023

Nieskalany commented Dec 6, 2023

earthboundkid commented Dec 6, 2023

myaaaaaaaaa commented Dec 18, 2023

forsaken628 commented Jan 16, 2024

AndrewHarrisSPU commented Jan 17, 2024

golightlyb commented Feb 15, 2024

jba commented Feb 20, 2024

rsc commented Aug 9, 2023 •

edited

DeedleFake commented Aug 9, 2023 •

edited

DeedleFake commented Aug 10, 2023 •

edited

hherman1 commented Aug 10, 2023 •

edited

Merovius commented Aug 10, 2023 •

edited

DeedleFake commented Aug 10, 2023 •

edited

DeedleFake commented Aug 10, 2023 •

edited

rsc commented Aug 10, 2023 •

edited

Merovius commented Aug 10, 2023 •

edited

Merovius commented Sep 8, 2023 •

edited

gazerro commented Sep 8, 2023 •

edited

jimmyfrasche commented Sep 8, 2023 •

edited