Convert List Of Numbers To String Ranges

August 07, 2024 Post a Comment

I'd like to know if there is a simple (or already created) way of doing the opposite of this: Generate List of Numbers from Hyphenated.... This link could be used to do: >>

Solution 1:

One approach could be "eating" piece by piece the input sequence and store the partial range results untill you've got them all:

defformatter(start, end, step):
    return'{}-{}:{}'.format(start, end, step)
    # return '{}-{}:{}'.format(start, end + step, step)defhelper(lst):
    iflen(lst) == 1:
        returnstr(lst[0]), []
    iflen(lst) == 2:
        return','.join(map(str,lst)), []

    step = lst[1] - lst[0]
    for i,x,y inzip(itertools.count(1), lst[1:], lst[2:]):
        if y-x != step:
            if i > 1:
                return formatter(lst[0], lst[i], step), lst[i+1:]
            else:
                returnstr(lst[0]), lst[1:]
    return formatter(lst[0], lst[-1], step), []

defre_range(lst):
    result = []
    while lst:
        partial,lst = helper(lst)
        result.append(partial)
    return','.join(result)

I test it with a bunch of unit tests and it passed them all, it can handle negative numbers too, but they'll look kind of ugly (it's really anybody's fault).

Example:

>>> re_range([1,  4,5,6, 10, 15,16,17,18, 22, 25,26,27,28])
'1,4-6:1,10,15-18:1,22,25-28:1'>>> re_range([1, 3, 5, 7, 8, 9, 10, 11, 13, 15, 17])
'1-7:2,8-11:1,13-17:2'

Note: I wrote the code for Python 3.

Performance

I didn't put any performance effort in the solution above. In particular, every time a list get re-builded with slicing, it might take some time if the input list has a particular shape. So, the first simple improvement would be using itertools.islice() where possible.

Anyway here's another implementation of the same algorithm, that scan through the input list with a scan index instead of slicing:

def re_range(lst):
    n = len(lst)
    result = []
    scan = 0
    while n - scan > 2:
        step = lst[scan + 1] - lst[scan]
        if lst[scan + 2] - lst[scan + 1] != step:
            result.append(str(lst[scan]))
            scan += 1
            continue

        for j in range(scan+2, n-1):
            if lst[j+1] - lst[j] != step:
                result.append(formatter(lst[scan], lst[j], step))
                scan = j+1
                break
        else:
            result.append(formatter(lst[scan], lst[-1], step))
            return ','.join(result)

    if n - scan == 1:
        result.append(str(lst[scan]))
    elif n - scan == 2:
        result.append(','.join(map(str, lst[scan:])))

    return ','.join(result)

I stopped working on it once it got ~65% faster than the previous top solution, it seemed enough :)

Anyway I'd say that there might still be room for improvement (expecially in the middle for-loop).

Solution 2:

This is a comparison of the 3 methods. Change the amount of data and the density via the values below...no matter what values I use, the first solution seems to be the quickest for me. For very large sets of data, the third solution becomes very slow.

EDITED

Edited to include comments below and add in a new solution. The last solution seems to be the quickest now.

import numpy as np
import itertools
import random
import timeit

# --- My Solution --------------------------------------------------------------deflist_to_ranges1(data):
   data = sorted(data)
   diff_data = np.diff(data)
   ranges = []
   i = 0
   skip_next = Falsefor k, iterable in itertools.groupby(diff_data, None):
      rng = list(iterable)
      step = rng[0]
      if skip_next:
         skip_next = False
         rng.pop()

      iflen(rng) == 0:
         continueeliflen(rng) == 1:
         ranges.append('%d' % data[i])
      elif step == 1:
         ranges.append('%d-%d' % (data[i], data[i+len(rng)]+step))
         i += 1
         skip_next = Trueelse:
         ranges.append('%d-%d:%d' % (data[i], data[i+len(rng)]+step, step))
         i += 1
         skip_next = True
      i += len(rng)

   iflen(rng) == 0orlen(rng) == 1:
      ranges.append('%d' % data[i])
   return','.join(ranges)

# --- Kaidence Solution --------------------------------------------------------# With a minor edit for use in range functiondeflist_to_ranges2(data):
   onediff = np.diff(data)
   twodiff = np.diff(onediff)
   increments, breakingindices = [], []
   for i inrange(len(twodiff)):
       if twodiff[i] != 0:
           breakingindices.append(i+2)  # Correct index because of the two diffs
           increments.append(onediff[i]) # Record the increment for this section# Increments and breakingindices should be the same size
   str_list = []
   start = data[0]
   for i inrange(len(breakingindices)):
       str_list.append("%d-%d:%d" % (start,
                                     data[breakingindices[i]-1] + increments[i],
                                     increments[i]))
       start = data[breakingindices[i]]
   str_list.append("%d-%d:%d" % (start,
                                 data[len(data)-1] + onediff[len(onediff)-1],
                                 onediff[len(onediff)-1]))
   return','.join(str_list)

# --- Rik Poggi Solution -------------------------------------------------------# With a minor edit for use in range functiondefhelper(lst):
    iflen(lst) == 1:
        returnstr(lst[0]), []
    iflen(lst) == 2:
        return','.join(map(str,lst)), []

    step = lst[1] - lst[0]
    #for i,x,y in itertools.izip(itertools.count(1), lst[1:], lst[2:]):for i,x,y in itertools.izip(itertools.count(1),
                                itertools.islice(lst, 1, None, 1),
                                itertools.islice(lst, 2, None, 1)):
        if y-x != step:
            if i > 1:
                return'{}-{}:{}'.format(lst[0], lst[i]+step, step), lst[i+1:]
            else:
                returnstr(lst[0]), lst[1:]
    return'{}-{}:{}'.format(lst[0], lst[-1]+step, step), []

deflist_to_ranges3(lst):
    result = []
    while lst:
        partial,lst = helper(lst)
        result.append(partial)
    return','.join(result)

# --- Rik Poggi Solution 2 -----------------------------------------------------defformatter(start, end, step):
    #return '{}-{}:{}'.format(start, end, step)return'{}-{}:{}'.format(start, end + step, step)

deflist_to_ranges4(lst):
    n = len(lst)
    result = []
    scan = 0while n - scan > 2:
        step = lst[scan + 1] - lst[scan]
        if lst[scan + 2] - lst[scan + 1] != step:
            result.append(str(lst[scan]))
            scan += 1continuefor j in xrange(scan+2, n-1):
            if lst[j+1] - lst[j] != step:
                result.append(formatter(lst[scan], lst[j], step))
                scan = j+1breakelse:
            result.append(formatter(lst[scan], lst[-1], step))
            return','.join(result)

    if n - scan == 1:
        result.append(str(lst[scan]))
    elif n - scan == 2:
        result.append(','.join(itertools.imap(str, lst[scan:])))

    return','.join(result)

# --- Test Function ------------------------------------------------------------deftest_data(data, f_to_test):
   data_str = f_to_test(data)
   _list = []
   for r in data_str.replace('-',':').split(','):
      r = [int(a) for a in r.split(':')]
      iflen(r) == 1:
         _list.extend(r)
      eliflen(r) == 2:
         _list.extend(range(r[0], r[1]))
      else:
         _list.extend(range(r[0], r[1], r[2]))
   return _list# --- Timing Tests -------------------------------------------------------------# Generate some sample data...
data_list = []
for i inrange(5):
   # Note: using the "4000" and "5000" values below, the relative density of# the data can be changed.  This has a huge effect on the results# (particularly on the results for list_to_ranges3 which uses recursion).
   data_list.append(sorted(list(set([random.randint(1,4000) for a in \
                                      range(random.randint(5,5000))]))))

testfuncs = list_to_ranges1, list_to_ranges2, list_to_ranges3, list_to_ranges4
for f in testfuncs:
   print'\n', f.__name__
   for i, data inenumerate(data_list):
      t = timeit.Timer('f(data)', 'from __main__ import data, f')
      #print f(data)print i, data==test_data(data, f), round(t.timeit(200), 3)

Solution 3:

This is most likely what you are looking for.

Edit: I see you already found the post. My apologies.

To help with the second part, I've tinkered a bit myself. This is what I came up with:

from numpy import diff

data = [ 1, 3, 5, 7, 8, 9, 10, 11, 13, 15, 17 ]
onediff, twodiff = diff(data), diff(diff(data))
increments, breakingindices = [], []
for i inrange(len(twodiff)):
    if twodiff[i] != 0:
        breakingindices.append(i+2) # Correct index because of the two diffs
        increments.append(onediff[i]) # Record the increment for this section# Increments and breakingindices should be the same size
str_list = []
start = data[0]
for i inrange(len(breakingindices)):
    str_list.append("%d-%d:%d" % (start, data[breakingindices[i]-1], increments[i]))
    start = data[breakingindices[i]]
str_list.append("%d-%d:%d" % (start, data[len(data)-1], onediff[len(onediff)-1]))
print str_list

For the given input list, this gives: ['1-7:2', '8-11:1', '13-17:2']. The code could do with a bit of cleanup, but this sorts with your problem assuming the grouping can be done sequentially.

{caution: for [1,2,3,5,6,7] this gives ['1-3:1', '5-5:2', '6-7:1'] instead of ['1-3:1', '5-7:1']}

Solution 4:

This is similar to versions that handle the step-size-of-one case enumerated here but also handles the singleton (elements with no more than 2 elements in a sequence or repeated elements) and non-unitary step sizes (including negative step sizes). It also does not drop duplicates for lists like [1, 2, 3, 3, 4, 5].

As for runtime: it's done before you blink.

defranges(L):
    """return a list of singletons or ranges of integers, (first, last, step)
    as they occur sequentially in the list of integers, L.

    Examples
    ========

    >>> list(ranges([1, 2, 4, 6, 7, 8, 10, 12, 13]))
    [1, (2, 6, 2), 7, (8, 12, 2), 13]
    >>> list(ranges([1,2,3,4,3,2,1,3,5,7,11,1,2,3]))
    [(1, 4, 1), (3, 1, -1), (3, 7, 2), 11, (1, 3, 1)]

    """ifnot L:
        return []
    r = []
    for i in L:
        iflen(r) < 2:
            r.append(i)
            iflen(r) == 2:
                d = r[1] - r[0]
        else:
            if i - r[1] == d:
                r[1] = i
            else:
                if r[1] - r[0] == d:
                    yield(r.pop(0))
                    r.append(i)
                    d = r[1] - r[0]
                else:
                    yield(tuple(r+[d]))
                    r[:] = [i]
    iflen(r) == 1:
        yield(r.pop())
    elif r[1] - r[0] == d:
        for i in r:
            yield i
    else:
        yield(tuple(r+[d]))

The raw output can be modified as desired, e.g. actual range instances can be created.

defsranges(i):
    """return pretty string for output of ranges.

    Examples
    ========

    >>> sranges([1,2,4,6,7,8,10,12,13,15,16,17])
    '1, range(2, 8, 2), 7, range(8, 14, 2), 13, range(15, 18)'

    """
    out = []
    for i in ranges(i):
        iftype(i) isint:
            out.append(str(i))
        elif i[-1] == 1:
            if i[0] == 0:
                out.append('range(%s)'%(i[1] + 1))
            else:
                out.append('range(%s, %s)'%(i[0], i[1] + 1))
        else:
            out.append('range(%s, %s, %s)'%(i[0], i[1] + i[2], i[2]))
    return', '.join(out)

Solution 5:

This function should do what you need without requiring any imports.

deflistToRanges(self, intList):
    ret = []
    for val insorted(intList):
        ifnot ret or ret[-1][-1]+1 != val:
            ret.append([val])
        else:
            ret[-1].append(val)
    return",".join([str(x[0]) iflen(x)==1elsestr(x[0])+"-"+str(x[-1]) for x in ret])

Python Tutorial for Beginners