8000 sort implementation by mike-ward · Pull Request #163 · vlang/coreutils · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

sort implementation #163

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ compare against the true GNU coreutils version on the Linux-based tests first.
| | link | Make a hard link via the link syscall |
| ✓ | ln | Make links between files |
| ✓ | logname | Print current login name |
| ✓ | ls | List directory contents |
| | ls | List directory contents |
| ✓ | md5sum | Print or check MD5 digests |
| ✓ | mkdir | Make directories |
| | mkfifo | Make FIFOs (named pipes) |
Expand Down Expand Up @@ -130,7 +130,7 @@ compare against the true GNU coreutils version on the Linux-based tests first.
| | shred | Remove files more securely |
| ✓ | shuf | Shuffling text |
| ✓ | sleep | Delay for a specified time |
| | sort | Sort text files |
| ✓ | sort | Sort text files |
| | split | Split a file into pieces |
| ✓ | stat | Report file or file system status |
| | stdbuf | Run a command with modified I/O stream buffering |
Expand Down
Empty file removed src/sort/delete.me
Empty file.
129 changes: 129 additions & 0 deletions src/sort/options.v
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
import common
import flag
import os
import time

const app_name = 'sort'

struct Options {
ignore_leading_blanks bool
dictionary_order bool
ignore_case bool
ignore_non_printing bool
numeric bool
reverse bool
// other optoins
check_diagnose bool
check_quiet bool
sort_keys []string
field_separator string = ' '
merge bool
output_file string
unique bool
files []string
}

fn get_options() Options {
mut fp := flag.new_flag_parser(os.args)
fp.application(app_name)
fp.version(common.coreutils_version())
fp.skip_executable()
fp.arguments_description('[FILE]')
fp.description('\nWrite sorted concatenation of all FILE(s) to standard output.' +
'\nWith no FILE, or when FILE is -, read standard input.')

ignore_leading_blanks := fp.bool('ignore-leading-blanks', `b`, false, 'ignore leading blanks')
dictionary_order := fp.bool('dictionary-order', `d`, false, 'consider only blanks and alphanumeric characters')
ignore_case := fp.bool('ignore-case', `f`, false, 'fold lower case to upper case characters')
ignore_non_printing := fp.bool('ignore-non-printing', `i`, false, 'consider only printable characters')
numeric := fp.bool('numeric-sort', `n`, false,
'Restrict the sort key to an initial numeric\n${flag.space}' +
'string, consisting of optional <blank> characters,\n${flag.space}' +
'optional <hyphen-minus> character, and zero or\n${flag.space}' +
'more digits, which shall be sorted by arithmetic\n${flag.space}' +
'value. An empty digit string shall be treated as\n${flag.space}' +
'zero. Leading zeros shall not affect ordering.')
reverse := fp.bool('reverse', `r`, false, 'reverse the result of comparisons\n\nOther options:')

check_diagnose := fp.bool('', `c`, false, 'check for sorted input; do not sort')
check_quiet := fp.bool('', `C`, false, 'like -c, but do not report first bad line')
sort_keys := fp.string_multi('key', `k`, 'sort via a key(s); <string> gives location and type')
merge := fp.bool('merge', `m`, false, 'merge already sorted files; do not sort')
field_separator := fp.string('', `t`, ' ', 'use <string> as field separator')
output_file := fp.string('output', `o`, '', 'write result to FILE instead of standard output')
unique := fp.bool('unique', `u`, false, 'with -c, check for strict ordering;\n${flag.space}' +
'without -c, output only the first of an equal run')

fp.footer("

KEYDEF is F[.C][OPTS][,F[.C][OPTS]] for start and stop position,
where F is a field number and C a character position in the
field; both are origin 1, and the stop position defaults to the
line's end. If neither -t nor -b is in effect, characters in a
field are counted from the beginning of the preceding whitespace.
OPTS is one or more single-letter ordering options [bdfir], which
override global ordering options for that key. If no key is
given, use the entire line as the key.".trim_indent())

fp.footer(common.coreutils_footer())
files := fp.finalize() or { exit_error(err.msg()) }

return Options{
ignore_leading_blanks: ignore_leading_blanks
dictionary_order: dictionary_order
ignore_case: ignore_case
ignore_non_printing: ignore_non_printing
numeric: numeric
reverse: reverse
// other options
check_diagnose: check_diagnose
check_quiet: check_quiet
sort_keys: sort_keys
field_separator: field_separator
merge: merge
output_file: output_file
unique: unique
files: scan_files_arg(files)
}
}

fn scan_files_arg(files_arg []string) []string {
mut files := []string{}
for file in files_arg {
if file == '-' {
files << stdin_to_tmp()
continue
}
files << file
}
if files.len == 0 {
files << stdin_to_tmp()
}
return files
}

const tmp_pattern = '/${app_name}-tmp-'

fn stdin_to_tmp() string {
tmp := '${os.temp_dir()}/${tmp_pattern}${time.ticks()}'
os.create(tmp) or { exit_error(err.msg()) }
mut f := os.open_append(tmp) or { exit_error(err.msg()) }
defer { f.close() }
for {
s := os.get_raw_line()
if s.len == 0 {
break
}
f.write_string(s) or { exit_error(err.msg()) }
}
return tmp
}

@[noreturn]
fn exit_error(msg string) {
if msg.len > 0 {
eprintln('${app_name}: ${error}')
}
eprintln("Try '${app_name} --help' for more information.")
exit(2) // exit(1) is used with the -c option
}
177 changes: 177 additions & 0 deletions src/sort/sort.v
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
import os
import arrays
import strconv

const space = ` `
const tab = `\t`

fn main() {
options := get_options()
results := sort(options)
if options.output_file == '' {
for result in results {
println(result)
}
} else {
os.write_lines(options.output_file, results) or { exit_error(err.msg()) }
}
}

fn sort(options Options) []string {
mut results := []string{}
for file in options.files {
results << do_sort(file, options)
}
return results
}

fn do_sort(file string, options Options) []string {
mut lines := os.read_lines(file) or { exit_error(err.msg()) }
original := if options.check_diagnose || options.check_quiet {
lines.clone()
} else {
[]string{}
}
match true {
// order matters here
options.sort_keys.len > 0 { sort_key(mut lines, options) }
options.numeric { sort_general_numeric(mut lines, options) }
options.ignore_case { sort_ignore_case(mut lines, options) }
options.dictionary_order { sort_dictionary_order(mut lines, options) }
options.ignore_non_printing { sort_ignore_non_printing(mut lines, options) }
options.ignore_leading_blanks { sort_ignore_leading_blanks(mut lines, options) }
else { sort_lines(mut lines, options) }
}
if options.unique {
lines = arrays.distinct(lines)
}
if original.len > 0 {
if lines != original {
if options.check_diagnose {
println('sort: not sorted')
}
exit(1)
} else {
if options.check_diagnose {
println('sort: already sorted')
}
exit(0)
}
}
return lines
}

fn sort_lines(mut lines []string, options Options) {
cmp := if options.reverse { compare_strings_reverse } else { compare_strings }
lines.sort_with_compare(fn [cmp] (a &string, b &string) int {
return cmp(a, b)
})
}

fn compare_strings_reverse(a &string, b &string) int {
return compare_strings(b, a)
}

fn sort_ignore_case(mut lines []string, options Options) {
lines.sort_ignore_case()
if options.reverse {
lines.reverse_in_place()
}
}

// Ignore leading blanks when finding sort keys in each line.
// By default a blank is a space or a tab
fn sort_ignore_leading_blanks(mut lines []string, options Options) {
cmp := if options.reverse { compare_strings_reverse } else { compare_strings }
lines.sort_with_compare(fn [cmp] (a &string, b &string) int {
return cmp(trim_leading_spaces(a), trim_leading_spaces(b))
})
}

fn trim_leading_spaces(s string) string {
return s.trim_left(' \n\t\v\f\r')
}

// Sort in phone directory order: ignore all characters except letters, digits
// and blanks when sorting. By default letters and digits are those of ASCII
fn sort_dictionary_order(mut lines []string, options Options) {
cmp := if options.reverse { compare_strings_reverse } else { compare_strings }
lines.sort_with_compare(fn [cmp] (a &string, b &string) int {
aa := a.bytes().map(is_dictionary_char).bytestr()
bb := b.bytes().map(is_dictionary_char).bytestr()
return cmp(aa, bb)
})
}

fn is_dictionary_char(e u8) u8 {
return match e.is_digit() || e.is_letter() || e == space || e == tab {
true { e }
else { space }
}
}

// Sort numerically, converting a prefix of each line to a long double-precision
// floating point number. See Floating point numbers. Do not report overflow,
// underflow, or conversion errors. Use the following collating sequence:
// Lines that do not start with numbers (all considered to be equal).
// - NaNs (“Not a Number” values, in IEEE floating point arithmetic) in a
// consistent but machine-dependent order.
// - Minus infinity.
// - Finite numbers in ascending numeric order (with -0 and +0 equal).
// - Plus infinity
fn sort_general_numeric(mut lines []string, options Options) {
cmp := if options.reverse { compare_strings_reverse } else { compare_strings }
lines.sort_with_compare(fn [cmp, options] (a &string, b &string) int {
numeric_a, rest_a := numeric_rest(a)
numeric_b, rest_b := numeric_rest(b)
numeric_diff := if options.reverse { numeric_b - numeric_a } else { numeric_a - numeric_b }
return if numeric_diff != 0 {
if numeric_diff > 0 { 1 } else { -1 }
} else {
cmp(rest_a, rest_b)
}
})
}

const minus_infinity = f64(-0xFFFFFFFFFFFFFFF)

fn numeric_rest(s string) (f64, string) {
mut num := 0.0
mut rest := s
mut allow_blanks := true
mut allow_sign := true
for i := 0; i < s.len; i++ {
c := s[i]
if allow_blanks && c == space {
continue
}
if allow_sign && (c == `-` || c == `+`) {
allow_sign = false
allow_blanks = false
continue
}
if c.is_digit() || c == strconv.c_dpoint {
allow_sign = false
allow_blanks = false
continue
}
num = strconv.atof64(s[0..i]) or { minus_infinity }
rest = s[i..].clone()
}
return num, rest
}

// This option has no effect if the stronger --dictionary-order (-d) option
// is also given.
fn sort_ignore_non_printing(mut lines []string, options Options) {
cmp := if options.reverse { compare_strings_reverse } else { compare_strings }
lines.sort_with_compare(fn [cmp] (a &string, b &string) int {
aa := a.bytes().map(is_printable).bytestr()
bb := b.bytes().map(is_printable).bytestr()
return cmp(aa, bb)
})
}

fn is_printable(e u8) u8 {
return if e >= u8(` `) && e <= u8(`~`) { e } else { space }
}
Loading
Loading
0